1 /*
2  * Copyright 2014 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "SkBlitRow.h"
9 #include "SkBlitMask.h"
10 #include "SkColorPriv.h"
11 #include "SkDither.h"
12 #include "SkMathPriv.h"
13 
S32_D565_Blend_mips_dsp(uint16_t * SK_RESTRICT dst,const SkPMColor * SK_RESTRICT src,int count,U8CPU alpha,int,int)14 static void S32_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
15                                     const SkPMColor* SK_RESTRICT src, int count,
16                                     U8CPU alpha, int /*x*/, int /*y*/) {
17     register uint32_t t0, t1, t2, t3, t4, t5, t6;
18     register uint32_t s0, s1, s2, s4, s5, s6;
19 
20     alpha += 1;
21     if (count >= 2) {
22         __asm__ volatile (
23            ".set             push                          \n\t"
24            ".set             noreorder                     \n\t"
25             "sll             %[s4],    %[alpha], 8         \n\t"
26             "or              %[s4],    %[s4],    %[alpha]  \n\t"
27             "repl.ph         %[s5],    0x1f                \n\t"
28             "repl.ph         %[s6],    0x3f                \n\t"
29         "1:                                                \n\t"
30             "lw              %[s2],    0(%[src])           \n\t"
31             "lw              %[s1],    4(%[src])           \n\t"
32             "lwr             %[s0],    0(%[dst])           \n\t"
33             "lwl             %[s0],    3(%[dst])           \n\t"
34             "and             %[t1],    %[s0],    %[s5]     \n\t"
35             "shra.ph         %[t0],    %[s0],    5         \n\t"
36             "and             %[t2],    %[t0],    %[s6]     \n\t"
37 #ifdef __mips_dspr2
38             "shrl.ph         %[t3],    %[s0],    11        \n\t"
39 #else
40             "shra.ph         %[t0],    %[s0],    11        \n\t"
41             "and             %[t3],    %[t0],    %[s5]     \n\t"
42 #endif
43             "precrq.ph.w     %[t0],    %[s1],    %[s2]     \n\t"
44             "shrl.qb         %[t5],    %[t0],    3         \n\t"
45             "and             %[t4],    %[t5],    %[s5]     \n\t"
46             "ins             %[s2],    %[s1],    16, 16    \n\t"
47             "preceu.ph.qbra  %[t0],    %[s2]               \n\t"
48             "shrl.qb         %[t6],    %[t0],    3         \n\t"
49 #ifdef __mips_dspr2
50             "shrl.ph         %[t5],    %[s2],    10        \n\t"
51 #else
52             "shra.ph         %[t0],    %[s2],    10        \n\t"
53             "and             %[t5],    %[t0],    %[s6]     \n\t"
54 #endif
55             "subu.qb         %[t4],    %[t4],    %[t1]     \n\t"
56             "subu.qb         %[t5],    %[t5],    %[t2]     \n\t"
57             "subu.qb         %[t6],    %[t6],    %[t3]     \n\t"
58             "muleu_s.ph.qbr  %[t4],    %[s4],    %[t4]     \n\t"
59             "muleu_s.ph.qbr  %[t5],    %[s4],    %[t5]     \n\t"
60             "muleu_s.ph.qbr  %[t6],    %[s4],    %[t6]     \n\t"
61             "addiu           %[count], %[count], -2        \n\t"
62             "addiu           %[src],   %[src],   8         \n\t"
63             "shra.ph         %[t4],    %[t4],    8         \n\t"
64             "shra.ph         %[t5],    %[t5],    8         \n\t"
65             "shra.ph         %[t6],    %[t6],    8         \n\t"
66             "addu.qb         %[t4],    %[t4],    %[t1]     \n\t"
67             "addu.qb         %[t5],    %[t5],    %[t2]     \n\t"
68             "addu.qb         %[t6],    %[t6],    %[t3]     \n\t"
69             "andi            %[s0],    %[t4],    0xffff    \n\t"
70             "andi            %[t0],    %[t5],    0xffff    \n\t"
71             "sll             %[t0],    %[t0],    0x5       \n\t"
72             "or              %[s0],    %[s0],    %[t0]     \n\t"
73             "sll             %[t0],    %[t6],    0xb       \n\t"
74             "or              %[t0],    %[t0],    %[s0]     \n\t"
75             "sh              %[t0],    0(%[dst])           \n\t"
76             "srl             %[s1],    %[t4],    16        \n\t"
77             "srl             %[t0],    %[t5],    16        \n\t"
78             "sll             %[t5],    %[t0],    5         \n\t"
79             "or              %[t0],    %[t5],    %[s1]     \n\t"
80             "srl             %[s0],    %[t6],    16        \n\t"
81             "sll             %[s2],    %[s0],    0xb       \n\t"
82             "or              %[s1],    %[s2],    %[t0]     \n\t"
83             "sh              %[s1],    2(%[dst])           \n\t"
84             "bge             %[count], 2,        1b        \n\t"
85             " addiu          %[dst],   %[dst],   4         \n\t"
86             ".set            pop                           \n\t"
87             : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
88               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
89               [s1]"=&r"(s1), [s2]"=&r"(s2), [s4]"=&r"(s4), [s5]"=&r"(s5),
90               [s6]"=&r"(s6), [count]"+r"(count), [dst]"+r"(dst),
91               [src]"+r"(src)
92             : [alpha]"r"(alpha)
93             : "memory", "hi", "lo"
94         );
95     }
96 
97     if (count == 1) {
98         SkPMColor c = *src++;
99         SkPMColorAssert(c);
100         SkASSERT(SkGetPackedA32(c) == 255);
101         uint16_t d = *dst;
102         *dst++ = SkPackRGB16(SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), alpha),
103                              SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), alpha),
104                              SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), alpha));
105     }
106 }
107 
S32A_D565_Opaque_Dither_mips_dsp(uint16_t * __restrict__ dst,const SkPMColor * __restrict__ src,int count,U8CPU alpha,int x,int y)108 static void S32A_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
109                                              const SkPMColor* __restrict__ src,
110                                              int count, U8CPU alpha, int x, int y) {
111     __asm__ volatile (
112         "pref  0,   0(%[src])     \n\t"
113         "pref  1,   0(%[dst])     \n\t"
114         "pref  0,   32(%[src])    \n\t"
115         "pref  1,   32(%[dst])    \n\t"
116         :
117         : [src]"r"(src), [dst]"r"(dst)
118         : "memory"
119     );
120 
121     register int32_t t0, t1, t2, t3, t4, t5, t6;
122     register int32_t t7, t8, t9, s0, s1, s2, s3;
123     const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
124 
125     if (count >= 2) {
126         __asm__ volatile (
127             ".set            push                                \n\t"
128             ".set            noreorder                           \n\t"
129             "li              %[s1],    0x01010101                \n\t"
130             "li              %[s2],    -2017                     \n\t"
131         "1:                                                      \n\t"
132             "bnez            %[s3],    4f                        \n\t"
133             " li             %[s3],    2                         \n\t"
134             "pref            0,        64(%[src])                \n\t"
135             "pref            1,        64(%[dst])                \n\t"
136         "4:                                                      \n\t"
137             "addiu           %[s3],    %[s3],    -1              \n\t"
138             "lw              %[t1],    0(%[src])                 \n\t"
139             "andi            %[t3],    %[x],     0x3             \n\t"
140             "addiu           %[x],     %[x],     1               \n\t"
141             "sll             %[t4],    %[t3],    2               \n\t"
142             "srav            %[t5],    %[dither_scan], %[t4]     \n\t"
143             "andi            %[t3],    %[t5],    0xf             \n\t"
144             "lw              %[t2],    4(%[src])                 \n\t"
145             "andi            %[t4],    %[x],     0x3             \n\t"
146             "sll             %[t5],    %[t4],    2               \n\t"
147             "srav            %[t6],    %[dither_scan], %[t5]     \n\t"
148             "addiu           %[x],     %[x],     1               \n\t"
149             "ins             %[t3],    %[t6],    8,    4         \n\t"
150             "srl             %[t4],    %[t1],    24              \n\t"
151             "addiu           %[t0],    %[t4],    1               \n\t"
152             "srl             %[t4],    %[t2],    24              \n\t"
153             "addiu           %[t5],    %[t4],    1               \n\t"
154             "ins             %[t0],    %[t5],    16,   16        \n\t"
155             "muleu_s.ph.qbr  %[t4],    %[t3],    %[t0]           \n\t"
156             "preceu.ph.qbla  %[t3],    %[t4]                     \n\t"
157             "andi            %[t4],    %[t1],    0xff            \n\t"
158             "ins             %[t4],    %[t2],    16,   8         \n\t"
159             "shrl.qb         %[t5],    %[t4],    5               \n\t"
160             "subu.qb         %[t6],    %[t3],    %[t5]           \n\t"
161             "addq.ph         %[t5],    %[t6],    %[t4]           \n\t"
162             "ext             %[t4],    %[t1],    8,    8         \n\t"
163             "srl             %[t6],    %[t2],    8               \n\t"
164             "ins             %[t4],    %[t6],    16,   8         \n\t"
165             "shrl.qb         %[t6],    %[t4],    6               \n\t"
166             "shrl.qb         %[t7],    %[t3],    1               \n\t"
167             "subu.qb         %[t8],    %[t7],    %[t6]           \n\t"
168             "addq.ph         %[t6],    %[t8],    %[t4]           \n\t"
169             "ext             %[t4],    %[t1],    16,   8         \n\t"
170             "srl             %[t7],    %[t2],    16              \n\t"
171             "ins             %[t4],    %[t7],    16,   8         \n\t"
172             "shrl.qb         %[t7],    %[t4],    5               \n\t"
173             "subu.qb         %[t8],    %[t3],    %[t7]           \n\t"
174             "addq.ph         %[t7],    %[t8],    %[t4]           \n\t"
175             "shll.ph         %[t4],    %[t7],    2               \n\t"
176             "andi            %[t9],    %[t4],    0xffff          \n\t"
177             "srl             %[s0],    %[t4],    16              \n\t"
178             "andi            %[t3],    %[t6],    0xffff          \n\t"
179             "srl             %[t4],    %[t6],    16              \n\t"
180             "andi            %[t6],    %[t5],    0xffff          \n\t"
181             "srl             %[t7],    %[t5],    16              \n\t"
182             "subq.ph         %[t5],    %[s1],    %[t0]           \n\t"
183             "srl             %[t0],    %[t5],    3               \n\t"
184             "beqz            %[t1],    3f                        \n\t"
185             " lhu            %[t5],    0(%[dst])                 \n\t"
186             "sll             %[t1],    %[t6],    13              \n\t"
187             "or              %[t8],    %[t9],    %[t1]           \n\t"
188             "sll             %[t1],    %[t3],    24              \n\t"
189             "or              %[t9],    %[t1],    %[t8]           \n\t"
190             "andi            %[t3],    %[t5],    0x7e0           \n\t"
191             "sll             %[t6],    %[t3],    0x10            \n\t"
192             "and             %[t8],    %[s2],    %[t5]           \n\t"
193             "or              %[t5],    %[t6],    %[t8]           \n\t"
194             "andi            %[t6],    %[t0],    0xff            \n\t"
195             "mul             %[t1],    %[t6],    %[t5]           \n\t"
196             "addu            %[t5],    %[t1],    %[t9]           \n\t"
197             "srl             %[t6],    %[t5],    5               \n\t"
198             "and             %[t5],    %[s2],    %[t6]           \n\t"
199             "srl             %[t8],    %[t6],    16              \n\t"
200             "andi            %[t6],    %[t8],    0x7e0           \n\t"
201             "or              %[t1],    %[t5],    %[t6]           \n\t"
202             "sh              %[t1],    0(%[dst])                 \n\t"
203         "3:                                                      \n\t"
204             "beqz            %[t2],    2f                        \n\t"
205             " lhu            %[t5],    2(%[dst])                 \n\t"
206             "sll             %[t1],    %[t7],    13              \n\t"
207             "or              %[t8],    %[s0],    %[t1]           \n\t"
208             "sll             %[t1],    %[t4],    24              \n\t"
209             "or              %[t9],    %[t1],    %[t8]           \n\t"
210             "andi            %[t3],    %[t5],    0x7e0           \n\t"
211             "sll             %[t6],    %[t3],    0x10            \n\t"
212             "and             %[t8],    %[s2],    %[t5]           \n\t"
213             "or              %[t5],    %[t6],    %[t8]           \n\t"
214             "srl             %[t6],    %[t0],    16              \n\t"
215             "mul             %[t1],    %[t6],    %[t5]           \n\t"
216             "addu            %[t5],    %[t1],    %[t9]           \n\t"
217             "srl             %[t6],    %[t5],    5               \n\t"
218             "and             %[t5],    %[s2],    %[t6]           \n\t"
219             "srl             %[t8],    %[t6],    16              \n\t"
220             "andi            %[t6],    %[t8],    0x7e0           \n\t"
221             "or              %[t1],    %[t5],    %[t6]           \n\t"
222             "sh              %[t1],    2(%[dst])                 \n\t"
223         "2:                                                      \n\t"
224             "addiu           %[count], %[count], -2              \n\t"
225             "addiu           %[src],   %[src],   8               \n\t"
226             "addiu           %[t1],    %[count], -1              \n\t"
227             "bgtz            %[t1],    1b                        \n\t"
228             " addiu          %[dst],  %[dst],    4               \n\t"
229             ".set            pop                                 \n\t"
230             : [src]"+r"(src), [count]"+r"(count), [dst]"+r"(dst), [x]"+r"(x),
231               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
232               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7),
233               [t8]"=&r"(t8),  [t9]"=&r"(t9), [s0]"=&r"(s0), [s1]"=&r"(s1),
234               [s2]"=&r"(s2), [s3]"=&r"(s3)
235             : [dither_scan]"r"(dither_scan)
236             : "memory", "hi", "lo"
237         );
238     }
239 
240     if (count == 1) {
241         SkPMColor c = *src++;
242         SkPMColorAssert(c);
243         if (c) {
244             unsigned a = SkGetPackedA32(c);
245             int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
246 
247             unsigned sr = SkGetPackedR32(c);
248             unsigned sg = SkGetPackedG32(c);
249             unsigned sb = SkGetPackedB32(c);
250             sr = SkDITHER_R32_FOR_565(sr, d);
251             sg = SkDITHER_G32_FOR_565(sg, d);
252             sb = SkDITHER_B32_FOR_565(sb, d);
253 
254             uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
255             uint32_t dst_expanded = SkExpand_rgb_16(*dst);
256             dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
257             // now src and dst expanded are in g:11 r:10 x:1 b:10
258             *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
259         }
260         dst += 1;
261         DITHER_INC_X(x);
262     }
263 }
264 
S32_D565_Opaque_Dither_mips_dsp(uint16_t * __restrict__ dst,const SkPMColor * __restrict__ src,int count,U8CPU alpha,int x,int y)265 static void S32_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
266                                             const SkPMColor* __restrict__ src,
267                                             int count, U8CPU alpha, int x, int y) {
268     uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
269     register uint32_t t0, t1, t2, t3, t4, t5;
270     register uint32_t t6, t7, t8, t9, s0;
271     int dither[4];
272     int i;
273 
274     for (i = 0; i < 4; i++, x++) {
275         dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
276     }
277 
278     __asm__ volatile (
279         ".set            push                          \n\t"
280         ".set            noreorder                     \n\t"
281         "li              %[s0],    1                   \n\t"
282     "2:                                                \n\t"
283         "beqz            %[count], 1f                  \n\t"
284         " nop                                          \n\t"
285         "addiu           %[t0],    %[count], -1        \n\t"
286         "beqz            %[t0],    1f                  \n\t"
287         " nop                                          \n\t"
288         "beqz            %[s0],    3f                  \n\t"
289         " nop                                          \n\t"
290         "lw              %[t0],    0(%[dither])        \n\t"
291         "lw              %[t1],    4(%[dither])        \n\t"
292         "li              %[s0],    0                   \n\t"
293         "b               4f                            \n\t"
294         " nop                                          \n\t"
295     "3:                                                \n\t"
296         "lw              %[t0],    8(%[dither])        \n\t"
297         "lw              %[t1],    12(%[dither])       \n\t"
298         "li              %[s0],    1                   \n\t"
299     "4:                                                \n\t"
300         "sll             %[t2],    %[t0],    16        \n\t"
301         "or              %[t1],    %[t2],    %[t1]     \n\t"
302         "lw              %[t0],    0(%[src])           \n\t"
303         "lw              %[t2],    4(%[src])           \n\t"
304         "precrq.ph.w     %[t3],    %[t0],    %[t2]     \n\t"
305         "preceu.ph.qbra  %[t9],    %[t3]               \n\t"
306 #ifdef __mips_dspr2
307         "append          %[t0],    %[t2],    16        \n\t"
308         "preceu.ph.qbra  %[t4],    %[t0]               \n\t"
309         "preceu.ph.qbla  %[t5],    %[t0]               \n\t"
310 #else
311         "sll             %[t6],    %[t0],    16        \n\t"
312         "sll             %[t7],    %[t2],    16        \n\t"
313         "precrq.ph.w     %[t8],    %[t6],    %[t7]     \n\t"
314         "preceu.ph.qbra  %[t4],    %[t8]               \n\t"
315         "preceu.ph.qbla  %[t5],    %[t8]               \n\t"
316 #endif
317         "addu.qb         %[t0],    %[t4],    %[t1]     \n\t"
318         "shra.ph         %[t2],    %[t4],    5         \n\t"
319         "subu.qb         %[t3],    %[t0],    %[t2]     \n\t"
320         "shra.ph         %[t6],    %[t3],    3         \n\t"
321         "addu.qb         %[t0],    %[t9],    %[t1]     \n\t"
322         "shra.ph         %[t2],    %[t9],    5         \n\t"
323         "subu.qb         %[t3],    %[t0],    %[t2]     \n\t"
324         "shra.ph         %[t7],    %[t3],    3         \n\t"
325         "shra.ph         %[t0],    %[t1],    1         \n\t"
326         "shra.ph         %[t2],    %[t5],    6         \n\t"
327         "addu.qb         %[t3],    %[t5],    %[t0]     \n\t"
328         "subu.qb         %[t4],    %[t3],    %[t2]     \n\t"
329         "shra.ph         %[t8],    %[t4],    2         \n\t"
330         "precrq.ph.w     %[t0],    %[t6],    %[t7]     \n\t"
331 #ifdef __mips_dspr2
332         "append          %[t6],    %[t7],    16        \n\t"
333 #else
334         "sll             %[t6],    %[t6],    16        \n\t"
335         "sll             %[t2],    %[t7],    16        \n\t"
336         "precrq.ph.w     %[t6],    %[t6],    %[t2]     \n\t"
337 #endif
338         "sra             %[t4],    %[t8],    16        \n\t"
339         "andi            %[t5],    %[t8],    0xFF      \n\t"
340         "sll             %[t7],    %[t4],    5         \n\t"
341         "sra             %[t8],    %[t0],    5         \n\t"
342         "or              %[t9],    %[t7],    %[t8]     \n\t"
343         "or              %[t3],    %[t9],    %[t0]     \n\t"
344         "andi            %[t4],    %[t3],    0xFFFF    \n\t"
345         "sll             %[t7],    %[t5],    5         \n\t"
346         "sra             %[t8],    %[t6],    5         \n\t"
347         "or              %[t9],    %[t7],    %[t8]     \n\t"
348         "or              %[t3],    %[t9],    %[t6]     \n\t"
349         "and             %[t7],    %[t3],    0xFFFF    \n\t"
350         "sh              %[t4],    0(%[dst])           \n\t"
351         "sh              %[t7],    2(%[dst])           \n\t"
352         "addiu           %[count], %[count], -2        \n\t"
353         "addiu           %[src],   %[src],   8         \n\t"
354         "b               2b                            \n\t"
355         " addiu          %[dst],   %[dst],   4         \n\t"
356     "1:                                                \n\t"
357         ".set            pop                           \n\t"
358         : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
359           [x]"+r"(x), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
360           [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
361           [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0)
362         : [dither] "r" (dither)
363         : "memory"
364     );
365 
366     if (count == 1) {
367         SkPMColor c = *src++;
368         SkPMColorAssert(c); // only if DEBUG is turned on
369         SkASSERT(SkGetPackedA32(c) == 255);
370         unsigned dither = DITHER_VALUE(x);
371         *dst++ = SkDitherRGB32To565(c, dither);
372     }
373 }
374 
S32_D565_Blend_Dither_mips_dsp(uint16_t * dst,const SkPMColor * src,int count,U8CPU alpha,int x,int y)375 static void S32_D565_Blend_Dither_mips_dsp(uint16_t* dst,
376                                            const SkPMColor* src,
377                                            int count, U8CPU alpha, int x, int y) {
378     register int32_t t0, t1, t2, t3, t4, t5, t6;
379     register int32_t s0, s1, s2, s3;
380     register int x1 = 0;
381     register uint32_t sc_mul;
382     register uint32_t sc_add;
383 #ifdef ENABLE_DITHER_MATRIX_4X4
384     const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
385 #else // ENABLE_DITHER_MATRIX_4X4
386     const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
387 #endif // ENABLE_DITHER_MATRIX_4X4
388     int dither[4];
389 
390     for (int i = 0; i < 4; i++) {
391         dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
392         x += 1;
393     }
394     alpha += 1;
395     __asm__ volatile (
396         ".set            push                              \n\t"
397         ".set            noreorder                         \n\t"
398         "li              %[t0],     0x100                  \n\t"
399         "subu            %[t0],     %[t0],     %[alpha]    \n\t"
400         "replv.ph        %[sc_mul], %[alpha]               \n\t"
401         "beqz            %[alpha],  1f                     \n\t"
402         " nop                                              \n\t"
403         "replv.qb        %[sc_add], %[t0]                  \n\t"
404         "b               2f                                \n\t"
405         " nop                                              \n\t"
406     "1:                                                    \n\t"
407         "replv.qb        %[sc_add], %[alpha]               \n\t"
408     "2:                                                    \n\t"
409         "addiu           %[t2],     %[count],  -1          \n\t"
410         "blez            %[t2],     3f                     \n\t"
411         " nop                                              \n\t"
412         "lw              %[s0],     0(%[src])              \n\t"
413         "lw              %[s1],     4(%[src])              \n\t"
414         "bnez            %[x1],     4f                     \n\t"
415         " nop                                              \n\t"
416         "lw              %[t0],     0(%[dither])           \n\t"
417         "lw              %[t1],     4(%[dither])           \n\t"
418         "li              %[x1],     1                      \n\t"
419         "b               5f                                \n\t"
420         " nop                                              \n\t"
421     "4:                                                    \n\t"
422         "lw              %[t0],     8(%[dither])           \n\t"
423         "lw              %[t1],     12(%[dither])          \n\t"
424         "li              %[x1],     0                      \n\t"
425     "5:                                                    \n\t"
426         "sll             %[t3],     %[t0],     7           \n\t"
427         "sll             %[t4],     %[t1],     7           \n\t"
428 #ifdef __mips_dspr2
429         "append          %[t0],     %[t1],     16          \n\t"
430 #else
431         "sll             %[t0],     %[t0],     8           \n\t"
432         "sll             %[t2],     %[t1],     8           \n\t"
433         "precrq.qb.ph    %[t0],     %[t0],     %[t2]       \n\t"
434 #endif
435         "precrq.qb.ph    %[t1],     %[t3],     %[t4]       \n\t"
436         "sll             %[t5],     %[s0],     8           \n\t"
437         "sll             %[t6],     %[s1],     8           \n\t"
438         "precrq.qb.ph    %[t4],     %[t5],     %[t6]       \n\t"
439         "precrq.qb.ph    %[t6],     %[s0],     %[s1]       \n\t"
440         "preceu.ph.qbla  %[t5],     %[t4]                  \n\t"
441         "preceu.ph.qbra  %[t4],     %[t4]                  \n\t"
442         "preceu.ph.qbra  %[t6],     %[t6]                  \n\t"
443         "lh              %[t2],     0(%[dst])              \n\t"
444         "lh              %[s1],     2(%[dst])              \n\t"
445 #ifdef __mips_dspr2
446         "append          %[t2],     %[s1],     16          \n\t"
447 #else
448         "sll             %[s1],     %[s1],     16          \n\t"
449         "packrl.ph       %[t2],     %[t2],     %[s1]       \n\t"
450 #endif
451         "shra.ph         %[s1],     %[t2],     11          \n\t"
452         "and             %[s1],     %[s1],     0x1F001F    \n\t"
453         "shra.ph         %[s2],     %[t2],     5           \n\t"
454         "and             %[s2],     %[s2],     0x3F003F    \n\t"
455         "and             %[s3],     %[t2],     0x1F001F    \n\t"
456         "shrl.qb         %[t3],     %[t4],     5           \n\t"
457         "addu.qb         %[t4],     %[t4],     %[t0]       \n\t"
458         "subu.qb         %[t4],     %[t4],     %[t3]       \n\t"
459         "shrl.qb         %[t4],     %[t4],     3           \n\t"
460         "shrl.qb         %[t3],     %[t5],     5           \n\t"
461         "addu.qb         %[t5],     %[t5],     %[t0]       \n\t"
462         "subu.qb         %[t5],     %[t5],     %[t3]       \n\t"
463         "shrl.qb         %[t5],     %[t5],     3           \n\t"
464         "shrl.qb         %[t3],     %[t6],     6           \n\t"
465         "addu.qb         %[t6],     %[t6],     %[t1]       \n\t"
466         "subu.qb         %[t6],     %[t6],     %[t3]       \n\t"
467         "shrl.qb         %[t6],     %[t6],     2           \n\t"
468         "cmpu.lt.qb      %[t4],     %[s1]                  \n\t"
469         "pick.qb         %[s0],     %[sc_add], $0          \n\t"
470         "addu.qb         %[s0],     %[s0],     %[s1]       \n\t"
471         "subu.qb         %[t4],     %[t4],     %[s1]       \n\t"
472         "muleu_s.ph.qbl  %[t0],     %[t4],     %[sc_mul]   \n\t"
473         "muleu_s.ph.qbr  %[t1],     %[t4],     %[sc_mul]   \n\t"
474         "precrq.qb.ph    %[t4],     %[t0],     %[t1]       \n\t"
475         "addu.qb         %[t4],     %[t4],     %[s0]       \n\t"
476         "cmpu.lt.qb      %[t5],     %[s3]                  \n\t"
477         "pick.qb         %[s0],     %[sc_add], $0          \n\t"
478         "addu.qb         %[s0],     %[s0],     %[s3]       \n\t"
479         "subu.qb         %[t5],     %[t5],     %[s3]       \n\t"
480         "muleu_s.ph.qbl  %[t0],     %[t5],     %[sc_mul]   \n\t"
481         "muleu_s.ph.qbr  %[t1],     %[t5],     %[sc_mul]   \n\t"
482         "precrq.qb.ph    %[t5],     %[t0],     %[t1]       \n\t"
483         "addu.qb         %[t5],     %[t5],     %[s0]       \n\t"
484         "cmpu.lt.qb      %[t6],     %[s2]                  \n\t"
485         "pick.qb         %[s0],     %[sc_add], $0          \n\t"
486         "addu.qb         %[s0],     %[s0],     %[s2]       \n\t"
487         "subu.qb         %[t6],     %[t6],     %[s2]       \n\t"
488         "muleu_s.ph.qbl  %[t0],     %[t6],     %[sc_mul]   \n\t"
489         "muleu_s.ph.qbr  %[t1],     %[t6],     %[sc_mul]   \n\t"
490         "precrq.qb.ph    %[t6],     %[t0],     %[t1]       \n\t"
491         "addu.qb         %[t6],     %[t6],     %[s0]       \n\t"
492         "shll.ph         %[s1],     %[t4],     11          \n\t"
493         "shll.ph         %[t0],     %[t6],     5           \n\t"
494         "or              %[s0],     %[s1],     %[t0]       \n\t"
495         "or              %[s1],     %[s0],     %[t5]       \n\t"
496         "srl             %[t2],     %[s1],     16          \n\t"
497         "and             %[t3],     %[s1],     0xFFFF      \n\t"
498         "sh              %[t2],     0(%[dst])              \n\t"
499         "sh              %[t3],     2(%[dst])              \n\t"
500         "addiu           %[src],    %[src],    8           \n\t"
501         "addi            %[count],  %[count],  -2          \n\t"
502         "b               2b                                \n\t"
503         " addu           %[dst],    %[dst],    4           \n\t"
504     "3:                                                    \n\t"
505         ".set            pop                               \n\t"
506         : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
507           [x1]"+r"(x1), [sc_mul]"=&r"(sc_mul), [sc_add]"=&r"(sc_add),
508           [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
509           [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
510           [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
511         : [dither]"r"(dither), [alpha]"r"(alpha)
512         : "memory", "hi", "lo"
513     );
514 
515     if(count == 1) {
516         SkPMColor c = *src++;
517         SkPMColorAssert(c);
518         SkASSERT(SkGetPackedA32(c) == 255);
519         DITHER_565_SCAN(y);
520         int dither = DITHER_VALUE(x);
521         int sr = SkGetPackedR32(c);
522         int sg = SkGetPackedG32(c);
523         int sb = SkGetPackedB32(c);
524         sr = SkDITHER_R32To565(sr, dither);
525         sg = SkDITHER_G32To565(sg, dither);
526         sb = SkDITHER_B32To565(sb, dither);
527 
528         uint16_t d = *dst;
529         *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), alpha),
530                              SkAlphaBlend(sg, SkGetPackedG16(d), alpha),
531                              SkAlphaBlend(sb, SkGetPackedB16(d), alpha));
532         DITHER_INC_X(x);
533     }
534 }
535 
S32A_D565_Opaque_mips_dsp(uint16_t * __restrict__ dst,const SkPMColor * __restrict__ src,int count,U8CPU alpha,int x,int y)536 static void S32A_D565_Opaque_mips_dsp(uint16_t* __restrict__ dst,
537                                       const SkPMColor* __restrict__ src,
538                                       int count, U8CPU alpha, int x, int y) {
539 
540     __asm__ volatile (
541         "pref  0,  0(%[src])     \n\t"
542         "pref  1,  0(%[dst])     \n\t"
543         "pref  0,  32(%[src])    \n\t"
544         "pref  1,  32(%[dst])    \n\t"
545         :
546         : [src]"r"(src), [dst]"r"(dst)
547         : "memory"
548     );
549 
550     register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
551     register uint32_t t16;
552     register uint32_t add_x10 = 0x100010;
553     register uint32_t add_x20 = 0x200020;
554     register uint32_t sa = 0xff00ff;
555 
556     __asm__ volatile (
557         ".set           push                            \n\t"
558         ".set           noreorder                       \n\t"
559         "blez           %[count], 1f                    \n\t"
560         " nop                                           \n\t"
561     "2:                                                 \n\t"
562         "beqz           %[count], 1f                    \n\t"
563         " nop                                           \n\t"
564         "addiu          %[t0],    %[count], -1          \n\t"
565         "beqz           %[t0],    1f                    \n\t"
566         " nop                                           \n\t"
567         "bnez           %[t16],   3f                    \n\t"
568         " nop                                           \n\t"
569         "li             %[t16],   2                     \n\t"
570         "pref           0,        64(%[src])            \n\t"
571         "pref           1,        64(%[dst])            \n\t"
572     "3:                                                 \n\t"
573         "addiu          %[t16],   %[t16],   -1          \n\t"
574         "lw             %[t0],    0(%[src])             \n\t"
575         "lw             %[t1],    4(%[src])             \n\t"
576         "precrq.ph.w    %[t2],    %[t0],    %[t1]       \n\t"
577         "preceu.ph.qbra %[t8],    %[t2]                 \n\t"
578 #ifdef __mips_dspr2
579         "append         %[t0],    %[t1],    16          \n\t"
580 #else
581         "sll            %[t0],    %[t0],    16          \n\t"
582         "sll            %[t6],    %[t1],    16          \n\t"
583         "precrq.ph.w    %[t0],    %[t0],    %[t6]       \n\t"
584 #endif
585         "preceu.ph.qbra %[t3],    %[t0]                 \n\t"
586         "preceu.ph.qbla %[t4],    %[t0]                 \n\t"
587         "preceu.ph.qbla %[t0],    %[t2]                 \n\t"
588         "subq.ph        %[t1],    %[sa],    %[t0]       \n\t"
589         "sra            %[t2],    %[t1],    8           \n\t"
590         "or             %[t5],    %[t2],    %[t1]       \n\t"
591         "replv.ph       %[t2],    %[t5]                 \n\t"
592         "lh             %[t0],    0(%[dst])             \n\t"
593         "lh             %[t1],    2(%[dst])             \n\t"
594         "and            %[t1],    %[t1],    0xffff      \n\t"
595 #ifdef __mips_dspr2
596         "append         %[t0],    %[t1],    16          \n\t"
597 #else
598         "sll            %[t5],    %[t0],    16          \n\t"
599         "or             %[t0],    %[t5],    %[t1]       \n\t"
600 #endif
601         "and            %[t1],    %[t0],    0x1f001f    \n\t"
602         "shra.ph        %[t6],    %[t0],    11          \n\t"
603         "and            %[t6],    %[t6],    0x1f001f    \n\t"
604         "and            %[t7],    %[t0],    0x7e007e0   \n\t"
605         "shra.ph        %[t5],    %[t7],    5           \n\t"
606         "muleu_s.ph.qbl %[t0],    %[t2],    %[t6]       \n\t"
607         "addq.ph        %[t7],    %[t0],    %[add_x10]  \n\t"
608         "shra.ph        %[t6],    %[t7],    5           \n\t"
609         "addq.ph        %[t6],    %[t7],    %[t6]       \n\t"
610         "shra.ph        %[t0],    %[t6],    5           \n\t"
611         "addq.ph        %[t7],    %[t0],    %[t3]       \n\t"
612         "shra.ph        %[t6],    %[t7],    3           \n\t"
613         "muleu_s.ph.qbl %[t0],    %[t2],    %[t1]       \n\t"
614         "addq.ph        %[t7],    %[t0],    %[add_x10]  \n\t"
615         "shra.ph        %[t0],    %[t7],    5           \n\t"
616         "addq.ph        %[t7],    %[t7],    %[t0]       \n\t"
617         "shra.ph        %[t0],    %[t7],    5           \n\t"
618         "addq.ph        %[t7],    %[t0],    %[t8]       \n\t"
619         "shra.ph        %[t3],    %[t7],    3           \n\t"
620         "muleu_s.ph.qbl %[t0],    %[t2],    %[t5]       \n\t"
621         "addq.ph        %[t7],    %[t0],    %[add_x20]  \n\t"
622         "shra.ph        %[t0],    %[t7],    6           \n\t"
623         "addq.ph        %[t8],    %[t7],    %[t0]       \n\t"
624         "shra.ph        %[t0],    %[t8],    6           \n\t"
625         "addq.ph        %[t7],    %[t0],    %[t4]       \n\t"
626         "shra.ph        %[t8],    %[t7],    2           \n\t"
627         "shll.ph        %[t0],    %[t8],    5           \n\t"
628         "shll.ph        %[t1],    %[t6],    11          \n\t"
629         "or             %[t2],    %[t0],    %[t1]       \n\t"
630         "or             %[t3],    %[t2],    %[t3]       \n\t"
631         "sra            %[t4],    %[t3],    16          \n\t"
632         "sh             %[t4],    0(%[dst])             \n\t"
633         "sh             %[t3],    2(%[dst])             \n\t"
634         "addiu          %[count], %[count], -2          \n\t"
635         "addiu          %[src],   %[src],   8           \n\t"
636         "b              2b                              \n\t"
637         " addiu         %[dst],   %[dst],   4           \n\t"
638     "1:                                                 \n\t"
639         ".set           pop                             \n\t"
640         : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
641           [t16]"=&r"(t16), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
642           [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
643           [t7]"=&r"(t7), [t8]"=&r"(t8)
644         : [add_x10]"r"(add_x10), [add_x20]"r"(add_x20), [sa]"r"(sa)
645         : "memory", "hi", "lo"
646     );
647 
648     if (count == 1) {
649         SkPMColor c = *src++;
650         SkPMColorAssert(c);
651         if (c) {
652             *dst = SkSrcOver32To16(c, *dst);
653         }
654         dst += 1;
655     }
656 }
657 
S32A_D565_Blend_mips_dsp(uint16_t * SK_RESTRICT dst,const SkPMColor * SK_RESTRICT src,int count,U8CPU alpha,int,int)658 static void S32A_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
659                                      const SkPMColor* SK_RESTRICT src, int count,
660                                      U8CPU alpha, int /*x*/, int /*y*/) {
661     register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
662     register uint32_t  s0, s1, s2, s3;
663     register unsigned dst_scale = 0;
664 
665     __asm__ volatile (
666         ".set            push                                       \n\t"
667         ".set            noreorder                                  \n\t"
668         "replv.qb        %[t0],        %[alpha]                     \n\t"
669         "repl.ph         %[t6],        0x80                         \n\t"
670         "repl.ph         %[t7],        0xFF                         \n\t"
671     "1:                                                             \n\t"
672         "addiu           %[t8],        %[count],     -1             \n\t"
673         "blez            %[t8],        2f                           \n\t"
674         " nop                                                       \n\t"
675         "lw              %[t8],        0(%[src])                    \n\t"
676         "lw              %[t9],        4(%[src])                    \n\t"
677         "lh              %[t4],        0(%[dst])                    \n\t"
678         "lh              %[t5],        2(%[dst])                    \n\t"
679         "sll             %[t5],        %[t5],        16             \n\t"
680         "sll             %[t2],        %[t8],        8              \n\t"
681         "sll             %[t3],        %[t9],        8              \n\t"
682         "precrq.qb.ph    %[t1],        %[t2],        %[t3]          \n\t"
683         "precrq.qb.ph    %[t3],        %[t8],        %[t9]          \n\t"
684         "preceu.ph.qbla  %[t8],        %[t3]                        \n\t"
685         "muleu_s.ph.qbr  %[s3],        %[t0],        %[t8]          \n\t"
686         "preceu.ph.qbla  %[t2],        %[t1]                        \n\t"
687         "preceu.ph.qbra  %[t1],        %[t1]                        \n\t"
688         "preceu.ph.qbra  %[t3],        %[t3]                        \n\t"
689         "packrl.ph       %[t9],        %[t4],        %[t5]          \n\t"
690         "shra.ph         %[s0],        %[t9],        11             \n\t"
691         "and             %[s0],        %[s0],        0x1F001F       \n\t"
692         "shra.ph         %[s1],        %[t9],        5              \n\t"
693         "and             %[s1],        %[s1],        0x3F003F       \n\t"
694         "and             %[s2],        %[t9],        0x1F001F       \n\t"
695         "addq.ph         %[s3],        %[s3],        %[t6]          \n\t"
696         "shra.ph         %[t5],        %[s3],        8              \n\t"
697         "and             %[t5],        %[t5],        0xFF00FF       \n\t"
698         "addq.ph         %[dst_scale], %[s3],        %[t5]          \n\t"
699         "shra.ph         %[dst_scale], %[dst_scale], 8              \n\t"
700         "subq_s.ph       %[dst_scale], %[t7],        %[dst_scale]   \n\t"
701         "sll             %[dst_scale], %[dst_scale], 8              \n\t"
702         "precrq.qb.ph    %[dst_scale], %[dst_scale], %[dst_scale]   \n\t"
703         "shrl.qb         %[t1],        %[t1],        3              \n\t"
704         "shrl.qb         %[t2],        %[t2],        3              \n\t"
705         "shrl.qb         %[t3],        %[t3],        2              \n\t"
706         "muleu_s.ph.qbl  %[t1],        %[t0],        %[t1]          \n\t"
707         "muleu_s.ph.qbl  %[t2],        %[t0],        %[t2]          \n\t"
708         "muleu_s.ph.qbl  %[t3],        %[t0],        %[t3]          \n\t"
709         "muleu_s.ph.qbl  %[t8],        %[dst_scale], %[s0]          \n\t"
710         "muleu_s.ph.qbl  %[t9],        %[dst_scale], %[s2]          \n\t"
711         "muleu_s.ph.qbl  %[t4],        %[dst_scale], %[s1]          \n\t"
712         "addq.ph         %[t1],        %[t1],        %[t8]          \n\t"
713         "addq.ph         %[t2],        %[t2],        %[t9]          \n\t"
714         "addq.ph         %[t3],        %[t3],        %[t4]          \n\t"
715         "addq.ph         %[t8],        %[t1],        %[t6]          \n\t"
716         "addq.ph         %[t9],        %[t2],        %[t6]          \n\t"
717         "addq.ph         %[t4],        %[t3],        %[t6]          \n\t"
718         "shra.ph         %[t1],        %[t8],        8              \n\t"
719         "addq.ph         %[t1],        %[t1],        %[t8]          \n\t"
720         "preceu.ph.qbla  %[t1],        %[t1]                        \n\t"
721         "shra.ph         %[t2],        %[t9],        8              \n\t"
722         "addq.ph         %[t2],        %[t2],        %[t9]          \n\t"
723         "preceu.ph.qbla  %[t2],        %[t2]                        \n\t"
724         "shra.ph         %[t3],        %[t4],        8              \n\t"
725         "addq.ph         %[t3],        %[t3],        %[t4]          \n\t"
726         "preceu.ph.qbla  %[t3],        %[t3]                        \n\t"
727         "shll.ph         %[t8],        %[t1],        11             \n\t"
728         "shll.ph         %[t9],        %[t3],        5              \n\t"
729         "or              %[t8],        %[t8],        %[t9]          \n\t"
730         "or              %[s0],        %[t8],        %[t2]          \n\t"
731         "srl             %[t8],        %[s0],        16             \n\t"
732         "and             %[t9],        %[s0],        0xFFFF         \n\t"
733         "sh              %[t8],        0(%[dst])                    \n\t"
734         "sh              %[t9],        2(%[dst])                    \n\t"
735         "addiu           %[src],       %[src],       8              \n\t"
736         "addiu           %[count],     %[count],     -2             \n\t"
737         "b               1b                                         \n\t"
738         " addiu          %[dst],       %[dst],       4              \n\t"
739     "2:                                                             \n\t"
740         ".set            pop                                        \n\t"
741         : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
742           [dst_scale]"+r"(dst_scale), [s0]"=&r"(s0), [s1]"=&r"(s1),
743           [s2]"=&r"(s2), [s3]"=&r"(s3), [t0]"=&r"(t0), [t1]"=&r"(t1),
744           [t2]"=&r"(t2), [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5),
745           [t6]"=&r"(t6), [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9)
746         : [alpha]"r"(alpha)
747         : "memory", "hi", "lo"
748     );
749 
750     if (count == 1) {
751         SkPMColor sc = *src++;
752         SkPMColorAssert(sc);
753         if (sc) {
754             uint16_t dc = *dst;
755             unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha);
756             unsigned dr = (SkPacked32ToR16(sc) * alpha) + (SkGetPackedR16(dc) * dst_scale);
757             unsigned dg = (SkPacked32ToG16(sc) * alpha) + (SkGetPackedG16(dc) * dst_scale);
758             unsigned db = (SkPacked32ToB16(sc) * alpha) + (SkGetPackedB16(dc) * dst_scale);
759             *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db));
760         }
761         dst += 1;
762     }
763 }
764 
S32_Blend_BlitRow32_mips_dsp(SkPMColor * SK_RESTRICT dst,const SkPMColor * SK_RESTRICT src,int count,U8CPU alpha)765 static void S32_Blend_BlitRow32_mips_dsp(SkPMColor* SK_RESTRICT dst,
766                                          const SkPMColor* SK_RESTRICT src,
767                                          int count, U8CPU alpha) {
768     register int32_t t0, t1, t2, t3, t4, t5, t6, t7;
769 
770     __asm__ volatile (
771         ".set            push                         \n\t"
772         ".set            noreorder                    \n\t"
773         "li              %[t2],    0x100              \n\t"
774         "addiu           %[t0],    %[alpha], 1        \n\t"
775         "subu            %[t1],    %[t2],    %[t0]    \n\t"
776         "replv.qb        %[t7],    %[t0]              \n\t"
777         "replv.qb        %[t6],    %[t1]              \n\t"
778     "1:                                               \n\t"
779         "blez            %[count], 2f                 \n\t"
780         "lw              %[t0],    0(%[src])          \n\t"
781         "lw              %[t1],    0(%[dst])          \n\t"
782         "preceu.ph.qbr   %[t2],    %[t0]              \n\t"
783         "preceu.ph.qbl   %[t3],    %[t0]              \n\t"
784         "preceu.ph.qbr   %[t4],    %[t1]              \n\t"
785         "preceu.ph.qbl   %[t5],    %[t1]              \n\t"
786         "muleu_s.ph.qbr  %[t2],    %[t7],    %[t2]    \n\t"
787         "muleu_s.ph.qbr  %[t3],    %[t7],    %[t3]    \n\t"
788         "muleu_s.ph.qbr  %[t4],    %[t6],    %[t4]    \n\t"
789         "muleu_s.ph.qbr  %[t5],    %[t6],    %[t5]    \n\t"
790         "addiu           %[src],   %[src],   4        \n\t"
791         "addiu           %[count], %[count], -1       \n\t"
792         "precrq.qb.ph    %[t0],    %[t3],    %[t2]    \n\t"
793         "precrq.qb.ph    %[t2],    %[t5],    %[t4]    \n\t"
794         "addu            %[t1],    %[t0],    %[t2]    \n\t"
795         "sw              %[t1],    0(%[dst])          \n\t"
796         "b               1b                           \n\t"
797         " addi           %[dst],   %[dst],   4        \n\t"
798     "2:                                               \n\t"
799         ".set            pop                          \n\t"
800         : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
801           [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
802           [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
803         : [alpha]"r"(alpha)
804         : "memory", "hi", "lo"
805     );
806 }
807 
blitmask_d565_opaque_mips(int width,int height,uint16_t * device,unsigned deviceRB,const uint8_t * alpha,uint32_t expanded32,unsigned maskRB)808 void blitmask_d565_opaque_mips(int width, int height, uint16_t* device,
809                                unsigned deviceRB, const uint8_t* alpha,
810                                uint32_t expanded32, unsigned maskRB) {
811     register uint32_t s0, s1, s2, s3;
812 
813     __asm__ volatile (
814         ".set            push                                    \n\t"
815         ".set            noreorder                               \n\t"
816         ".set            noat                                    \n\t"
817         "li              $t9,       0x7E0F81F                    \n\t"
818     "1:                                                          \n\t"
819         "move            $t8,       %[width]                     \n\t"
820         "addiu           %[height], %[height],     -1            \n\t"
821     "2:                                                          \n\t"
822         "beqz            $t8,       4f                           \n\t"
823         " addiu          $t0,       $t8,           -4            \n\t"
824         "bltz            $t0,       3f                           \n\t"
825         " nop                                                    \n\t"
826         "addiu           $t8,       $t8,           -4            \n\t"
827         "lhu             $t0,       0(%[device])                 \n\t"
828         "lhu             $t1,       2(%[device])                 \n\t"
829         "lhu             $t2,       4(%[device])                 \n\t"
830         "lhu             $t3,       6(%[device])                 \n\t"
831         "lbu             $t4,       0(%[alpha])                  \n\t"
832         "lbu             $t5,       1(%[alpha])                  \n\t"
833         "lbu             $t6,       2(%[alpha])                  \n\t"
834         "lbu             $t7,       3(%[alpha])                  \n\t"
835         "replv.ph        $t0,       $t0                          \n\t"
836         "replv.ph        $t1,       $t1                          \n\t"
837         "replv.ph        $t2,       $t2                          \n\t"
838         "replv.ph        $t3,       $t3                          \n\t"
839         "addiu           %[s0],     $t4,           1             \n\t"
840         "addiu           %[s1],     $t5,           1             \n\t"
841         "addiu           %[s2],     $t6,           1             \n\t"
842         "addiu           %[s3],     $t7,           1             \n\t"
843         "srl             %[s0],     %[s0],         3             \n\t"
844         "srl             %[s1],     %[s1],         3             \n\t"
845         "srl             %[s2],     %[s2],         3             \n\t"
846         "srl             %[s3],     %[s3],         3             \n\t"
847         "and             $t0,       $t0,           $t9           \n\t"
848         "and             $t1,       $t1,           $t9           \n\t"
849         "and             $t2,       $t2,           $t9           \n\t"
850         "and             $t3,       $t3,           $t9           \n\t"
851         "subu            $t4,       %[expanded32], $t0           \n\t"
852         "subu            $t5,       %[expanded32], $t1           \n\t"
853         "subu            $t6,       %[expanded32], $t2           \n\t"
854         "subu            $t7,       %[expanded32], $t3           \n\t"
855         "mul             $t4,       $t4,           %[s0]         \n\t"
856         "mul             $t5,       $t5,           %[s1]         \n\t"
857         "mul             $t6,       $t6,           %[s2]         \n\t"
858         "mul             $t7,       $t7,           %[s3]         \n\t"
859         "addiu           %[alpha],  %[alpha],      4             \n\t"
860         "srl             $t4,       $t4,           5             \n\t"
861         "srl             $t5,       $t5,           5             \n\t"
862         "srl             $t6,       $t6,           5             \n\t"
863         "srl             $t7,       $t7,           5             \n\t"
864         "addu            $t4,       $t0,           $t4           \n\t"
865         "addu            $t5,       $t1,           $t5           \n\t"
866         "addu            $t6,       $t2,           $t6           \n\t"
867         "addu            $t7,       $t3,           $t7           \n\t"
868         "and             $t4,       $t4,           $t9           \n\t"
869         "and             $t5,       $t5,           $t9           \n\t"
870         "and             $t6,       $t6,           $t9           \n\t"
871         "and             $t7,       $t7,           $t9           \n\t"
872         "srl             $t0,       $t4,           16            \n\t"
873         "srl             $t1,       $t5,           16            \n\t"
874         "srl             $t2,       $t6,           16            \n\t"
875         "srl             $t3,       $t7,           16            \n\t"
876         "or              %[s0],     $t0,           $t4           \n\t"
877         "or              %[s1],     $t1,           $t5           \n\t"
878         "or              %[s2],     $t2,           $t6           \n\t"
879         "or              %[s3],     $t3,           $t7           \n\t"
880         "sh              %[s0],     0(%[device])                 \n\t"
881         "sh              %[s1],     2(%[device])                 \n\t"
882         "sh              %[s2],     4(%[device])                 \n\t"
883         "sh              %[s3],     6(%[device])                 \n\t"
884         "b               2b                                      \n\t"
885         " addiu          %[device], %[device],     8             \n\t"
886     "3:                                                          \n\t"
887         "lhu             $t0,       0(%[device])                 \n\t"
888         "lbu             $t1,       0(%[alpha])                  \n\t"
889         "addiu           $t8,       $t8,           -1            \n\t"
890         "replv.ph        $t2,       $t0                          \n\t"
891         "and             $t2,       $t2,           $t9           \n\t"
892         "addiu           $t0,       $t1,           1             \n\t"
893         "srl             $t0,       $t0,           3             \n\t"
894         "subu            $t3,       %[expanded32], $t2           \n\t"
895         "mul             $t3,       $t3,           $t0           \n\t"
896         "addiu           %[alpha],  %[alpha],      1             \n\t"
897         "srl             $t3,       $t3,           5             \n\t"
898         "addu            $t3,       $t2,           $t3           \n\t"
899         "and             $t3,       $t3,           $t9           \n\t"
900         "srl             $t4,       $t3,           16            \n\t"
901         "or              %[s0],     $t4,           $t3           \n\t"
902         "sh              %[s0],     0(%[device])                 \n\t"
903         "bnez            $t8,       3b                           \n\t"
904          "addiu          %[device], %[device],     2             \n\t"
905     "4:                                                          \n\t"
906         "addu            %[device], %[device],     %[deviceRB]   \n\t"
907         "bgtz            %[height], 1b                           \n\t"
908         " addu           %[alpha],  %[alpha],      %[maskRB]     \n\t"
909         ".set            pop                                     \n\t"
910         : [height]"+r"(height), [alpha]"+r"(alpha), [device]"+r"(device),
911           [deviceRB]"+r"(deviceRB), [maskRB]"+r"(maskRB), [s0]"=&r"(s0),
912           [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
913         : [expanded32] "r" (expanded32), [width] "r" (width)
914         : "memory", "hi", "lo", "t0", "t1", "t2", "t3",
915           "t4", "t5", "t6", "t7", "t8", "t9"
916     );
917 }
918 
919 ///////////////////////////////////////////////////////////////////////////////////////////////////
920 
921 const SkBlitRow::Proc16 platform_565_procs_mips_dsp[] = {
922     // no dither
923     nullptr,
924     S32_D565_Blend_mips_dsp,
925     S32A_D565_Opaque_mips_dsp,
926     S32A_D565_Blend_mips_dsp,
927 
928     // dither
929     S32_D565_Opaque_Dither_mips_dsp,
930     S32_D565_Blend_Dither_mips_dsp,
931     S32A_D565_Opaque_Dither_mips_dsp,
932     nullptr,
933 };
934 
935 static const SkBlitRow::Proc32 platform_32_procs_mips_dsp[] = {
936     nullptr,   // S32_Opaque,
937     S32_Blend_BlitRow32_mips_dsp,   // S32_Blend,
938     nullptr,   // S32A_Opaque,
939     nullptr,   // S32A_Blend,
940 };
941 
PlatformFactory565(unsigned flags)942 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
943     return platform_565_procs_mips_dsp[flags];
944 }
945 
PlatformColorFactory565(unsigned flags)946 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
947     return nullptr;
948 }
949 
PlatformProcs32(unsigned flags)950 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
951     return platform_32_procs_mips_dsp[flags];
952 }
953