1 // Copyright 2015 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transform methods for lossless encoder.
11 //
12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14
15 #include "./dsp.h"
16
17 #if defined(WEBP_USE_MIPS_DSP_R2)
18
19 #include "./lossless.h"
20
SubtractGreenFromBlueAndRed(uint32_t * argb_data,int num_pixels)21 static void SubtractGreenFromBlueAndRed(uint32_t* argb_data,
22 int num_pixels) {
23 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
24 uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3);
25 uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3);
26 __asm__ volatile (
27 ".set push \n\t"
28 ".set noreorder \n\t"
29 "beq %[argb_data], %[p_loop1_end], 3f \n\t"
30 " nop \n\t"
31 "0: \n\t"
32 "lw %[temp0], 0(%[argb_data]) \n\t"
33 "lw %[temp1], 4(%[argb_data]) \n\t"
34 "lw %[temp2], 8(%[argb_data]) \n\t"
35 "lw %[temp3], 12(%[argb_data]) \n\t"
36 "ext %[temp4], %[temp0], 8, 8 \n\t"
37 "ext %[temp5], %[temp1], 8, 8 \n\t"
38 "ext %[temp6], %[temp2], 8, 8 \n\t"
39 "ext %[temp7], %[temp3], 8, 8 \n\t"
40 "addiu %[argb_data], %[argb_data], 16 \n\t"
41 "replv.ph %[temp4], %[temp4] \n\t"
42 "replv.ph %[temp5], %[temp5] \n\t"
43 "replv.ph %[temp6], %[temp6] \n\t"
44 "replv.ph %[temp7], %[temp7] \n\t"
45 "subu.qb %[temp0], %[temp0], %[temp4] \n\t"
46 "subu.qb %[temp1], %[temp1], %[temp5] \n\t"
47 "subu.qb %[temp2], %[temp2], %[temp6] \n\t"
48 "subu.qb %[temp3], %[temp3], %[temp7] \n\t"
49 "sw %[temp0], -16(%[argb_data]) \n\t"
50 "sw %[temp1], -12(%[argb_data]) \n\t"
51 "sw %[temp2], -8(%[argb_data]) \n\t"
52 "bne %[argb_data], %[p_loop1_end], 0b \n\t"
53 " sw %[temp3], -4(%[argb_data]) \n\t"
54 "3: \n\t"
55 "beq %[argb_data], %[p_loop2_end], 2f \n\t"
56 " nop \n\t"
57 "1: \n\t"
58 "lw %[temp0], 0(%[argb_data]) \n\t"
59 "addiu %[argb_data], %[argb_data], 4 \n\t"
60 "ext %[temp4], %[temp0], 8, 8 \n\t"
61 "replv.ph %[temp4], %[temp4] \n\t"
62 "subu.qb %[temp0], %[temp0], %[temp4] \n\t"
63 "bne %[argb_data], %[p_loop2_end], 1b \n\t"
64 " sw %[temp0], -4(%[argb_data]) \n\t"
65 "2: \n\t"
66 ".set pop \n\t"
67 : [argb_data]"+&r"(argb_data), [temp0]"=&r"(temp0),
68 [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
69 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
70 [temp7]"=&r"(temp7)
71 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
72 : "memory"
73 );
74 }
75
ColorTransformDelta(int8_t color_pred,int8_t color)76 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
77 int8_t color) {
78 return (uint32_t)((int)(color_pred) * color) >> 5;
79 }
80
TransformColor(const VP8LMultipliers * const m,uint32_t * data,int num_pixels)81 static void TransformColor(const VP8LMultipliers* const m, uint32_t* data,
82 int num_pixels) {
83 int temp0, temp1, temp2, temp3, temp4, temp5;
84 uint32_t argb, argb1, new_red, new_red1;
85 const uint32_t G_to_R = m->green_to_red_;
86 const uint32_t G_to_B = m->green_to_blue_;
87 const uint32_t R_to_B = m->red_to_blue_;
88 uint32_t* const p_loop_end = data + (num_pixels & ~1);
89 __asm__ volatile (
90 ".set push \n\t"
91 ".set noreorder \n\t"
92 "beq %[data], %[p_loop_end], 1f \n\t"
93 " nop \n\t"
94 "replv.ph %[temp0], %[G_to_R] \n\t"
95 "replv.ph %[temp1], %[G_to_B] \n\t"
96 "replv.ph %[temp2], %[R_to_B] \n\t"
97 "shll.ph %[temp0], %[temp0], 8 \n\t"
98 "shll.ph %[temp1], %[temp1], 8 \n\t"
99 "shll.ph %[temp2], %[temp2], 8 \n\t"
100 "shra.ph %[temp0], %[temp0], 8 \n\t"
101 "shra.ph %[temp1], %[temp1], 8 \n\t"
102 "shra.ph %[temp2], %[temp2], 8 \n\t"
103 "0: \n\t"
104 "lw %[argb], 0(%[data]) \n\t"
105 "lw %[argb1], 4(%[data]) \n\t"
106 "lhu %[new_red], 2(%[data]) \n\t"
107 "lhu %[new_red1], 6(%[data]) \n\t"
108 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
109 "precr.qb.ph %[temp4], %[argb], %[argb1] \n\t"
110 "preceu.ph.qbra %[temp3], %[temp3] \n\t"
111 "preceu.ph.qbla %[temp4], %[temp4] \n\t"
112 "shll.ph %[temp3], %[temp3], 8 \n\t"
113 "shll.ph %[temp4], %[temp4], 8 \n\t"
114 "shra.ph %[temp3], %[temp3], 8 \n\t"
115 "shra.ph %[temp4], %[temp4], 8 \n\t"
116 "mul.ph %[temp5], %[temp3], %[temp0] \n\t"
117 "mul.ph %[temp3], %[temp3], %[temp1] \n\t"
118 "mul.ph %[temp4], %[temp4], %[temp2] \n\t"
119 "addiu %[data], %[data], 8 \n\t"
120 "ins %[new_red1], %[new_red], 16, 16 \n\t"
121 "ins %[argb1], %[argb], 16, 16 \n\t"
122 "shra.ph %[temp5], %[temp5], 5 \n\t"
123 "shra.ph %[temp3], %[temp3], 5 \n\t"
124 "shra.ph %[temp4], %[temp4], 5 \n\t"
125 "subu.ph %[new_red1], %[new_red1], %[temp5] \n\t"
126 "subu.ph %[argb1], %[argb1], %[temp3] \n\t"
127 "preceu.ph.qbra %[temp5], %[new_red1] \n\t"
128 "subu.ph %[argb1], %[argb1], %[temp4] \n\t"
129 "preceu.ph.qbra %[temp3], %[argb1] \n\t"
130 "sb %[temp5], -2(%[data]) \n\t"
131 "sb %[temp3], -4(%[data]) \n\t"
132 "sra %[temp5], %[temp5], 16 \n\t"
133 "sra %[temp3], %[temp3], 16 \n\t"
134 "sb %[temp5], -6(%[data]) \n\t"
135 "bne %[data], %[p_loop_end], 0b \n\t"
136 " sb %[temp3], -8(%[data]) \n\t"
137 "1: \n\t"
138 ".set pop \n\t"
139 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
140 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
141 [new_red1]"=&r"(new_red1), [new_red]"=&r"(new_red),
142 [argb]"=&r"(argb), [argb1]"=&r"(argb1), [data]"+&r"(data)
143 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
144 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
145 : "memory", "hi", "lo"
146 );
147
148 if (num_pixels & 1) {
149 const uint32_t argb_ = data[0];
150 const uint32_t green = argb_ >> 8;
151 const uint32_t red = argb_ >> 16;
152 uint32_t new_blue = argb_;
153 new_red = red;
154 new_red -= ColorTransformDelta(m->green_to_red_, green);
155 new_red &= 0xff;
156 new_blue -= ColorTransformDelta(m->green_to_blue_, green);
157 new_blue -= ColorTransformDelta(m->red_to_blue_, red);
158 new_blue &= 0xff;
159 data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue);
160 }
161 }
162
TransformColorBlue(uint8_t green_to_blue,uint8_t red_to_blue,uint32_t argb)163 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
164 uint8_t red_to_blue,
165 uint32_t argb) {
166 const uint32_t green = argb >> 8;
167 const uint32_t red = argb >> 16;
168 uint8_t new_blue = argb;
169 new_blue -= ColorTransformDelta(green_to_blue, green);
170 new_blue -= ColorTransformDelta(red_to_blue, red);
171 return (new_blue & 0xff);
172 }
173
CollectColorBlueTransforms(const uint32_t * argb,int stride,int tile_width,int tile_height,int green_to_blue,int red_to_blue,int histo[])174 static void CollectColorBlueTransforms(const uint32_t* argb, int stride,
175 int tile_width, int tile_height,
176 int green_to_blue, int red_to_blue,
177 int histo[]) {
178 const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
179 const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
180 const uint32_t mask = 0xff00ffu;
181 while (tile_height-- > 0) {
182 int x;
183 const uint32_t* p_argb = argb;
184 argb += stride;
185 for (x = 0; x < (tile_width >> 1); ++x) {
186 int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
187 __asm__ volatile (
188 "lw %[temp0], 0(%[p_argb]) \n\t"
189 "lw %[temp1], 4(%[p_argb]) \n\t"
190 "precr.qb.ph %[temp2], %[temp0], %[temp1] \n\t"
191 "ins %[temp1], %[temp0], 16, 16 \n\t"
192 "shra.ph %[temp2], %[temp2], 8 \n\t"
193 "shra.ph %[temp3], %[temp1], 8 \n\t"
194 "mul.ph %[temp5], %[temp2], %[rtb] \n\t"
195 "mul.ph %[temp6], %[temp3], %[gtb] \n\t"
196 "and %[temp4], %[temp1], %[mask] \n\t"
197 "addiu %[p_argb], %[p_argb], 8 \n\t"
198 "shra.ph %[temp5], %[temp5], 5 \n\t"
199 "shra.ph %[temp6], %[temp6], 5 \n\t"
200 "subu.qb %[temp2], %[temp4], %[temp5] \n\t"
201 "subu.qb %[temp2], %[temp2], %[temp6] \n\t"
202 : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
203 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
204 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6)
205 : [rtb]"r"(rtb), [gtb]"r"(gtb), [mask]"r"(mask)
206 : "memory", "hi", "lo"
207 );
208 ++histo[(uint8_t)(temp2 >> 16)];
209 ++histo[(uint8_t)temp2];
210 }
211 if (tile_width & 1) {
212 ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)];
213 }
214 }
215 }
216
TransformColorRed(uint8_t green_to_red,uint32_t argb)217 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
218 uint32_t argb) {
219 const uint32_t green = argb >> 8;
220 uint32_t new_red = argb >> 16;
221 new_red -= ColorTransformDelta(green_to_red, green);
222 return (new_red & 0xff);
223 }
224
CollectColorRedTransforms(const uint32_t * argb,int stride,int tile_width,int tile_height,int green_to_red,int histo[])225 static void CollectColorRedTransforms(const uint32_t* argb, int stride,
226 int tile_width, int tile_height,
227 int green_to_red, int histo[]) {
228 const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
229 while (tile_height-- > 0) {
230 int x;
231 const uint32_t* p_argb = argb;
232 argb += stride;
233 for (x = 0; x < (tile_width >> 1); ++x) {
234 int temp0, temp1, temp2, temp3, temp4;
235 __asm__ volatile (
236 "lw %[temp0], 0(%[p_argb]) \n\t"
237 "lw %[temp1], 4(%[p_argb]) \n\t"
238 "precrq.ph.w %[temp4], %[temp0], %[temp1] \n\t"
239 "ins %[temp1], %[temp0], 16, 16 \n\t"
240 "shra.ph %[temp3], %[temp1], 8 \n\t"
241 "mul.ph %[temp2], %[temp3], %[gtr] \n\t"
242 "addiu %[p_argb], %[p_argb], 8 \n\t"
243 "shra.ph %[temp2], %[temp2], 5 \n\t"
244 "subu.qb %[temp2], %[temp4], %[temp2] \n\t"
245 : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
246 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
247 : [gtr]"r"(gtr)
248 : "memory", "hi", "lo"
249 );
250 ++histo[(uint8_t)(temp2 >> 16)];
251 ++histo[(uint8_t)temp2];
252 }
253 if (tile_width & 1) {
254 ++histo[TransformColorRed(green_to_red, *p_argb)];
255 }
256 }
257 }
258
259 //------------------------------------------------------------------------------
260 // Entry point
261
262 extern void VP8LEncDspInitMIPSdspR2(void);
263
VP8LEncDspInitMIPSdspR2(void)264 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) {
265 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
266 VP8LTransformColor = TransformColor;
267 VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
268 VP8LCollectColorRedTransforms = CollectColorRedTransforms;
269 }
270
271 #else // !WEBP_USE_MIPS_DSP_R2
272
273 WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPSdspR2)
274
275 #endif // WEBP_USE_MIPS_DSP_R2
276