1 // Copyright 2013 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Utilities for processing transparent channel.
11 //
12 // Author: Skal (pascal.massimino@gmail.com)
13
14 #include <assert.h>
15 #include "./dsp.h"
16
17 // Tables can be faster on some platform but incur some extra binary size (~2k).
18 // #define USE_TABLES_FOR_ALPHA_MULT
19
20 // -----------------------------------------------------------------------------
21
22 #define MFIX 24 // 24bit fixed-point arithmetic
23 #define HALF ((1u << MFIX) >> 1)
24 #define KINV_255 ((1u << MFIX) / 255u)
25
Mult(uint8_t x,uint32_t mult)26 static uint32_t Mult(uint8_t x, uint32_t mult) {
27 const uint32_t v = (x * mult + HALF) >> MFIX;
28 assert(v <= 255); // <- 24bit precision is enough to ensure that.
29 return v;
30 }
31
32 #ifdef USE_TABLES_FOR_ALPHA_MULT
33
34 static const uint32_t kMultTables[2][256] = {
35 { // (255u << MFIX) / alpha
36 0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
37 0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
38 0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
39 0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
40 0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
41 0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
42 0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
43 0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
44 0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
45 0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
46 0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
47 0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
48 0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
49 0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
50 0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
51 0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
52 0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
53 0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
54 0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
55 0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
56 0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
57 0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
58 0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
59 0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
60 0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
61 0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
62 0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
63 0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
64 0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
65 0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
66 0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
67 0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
68 0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
69 0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
70 0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
71 0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
72 0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
73 0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
74 0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
75 0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
76 0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
77 0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
78 0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
79 { // alpha * KINV_255
80 0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
81 0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
82 0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
83 0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
84 0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
85 0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
86 0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
87 0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
88 0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
89 0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
90 0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
91 0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
92 0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
93 0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
94 0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
95 0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
96 0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
97 0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
98 0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
99 0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
100 0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
101 0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
102 0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
103 0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
104 0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
105 0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
106 0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
107 0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
108 0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
109 0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
110 0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
111 0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
112 0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
113 0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
114 0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
115 0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
116 0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
117 0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
118 0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
119 0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
120 0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
121 0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
122 0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
123 };
124
GetScale(uint32_t a,int inverse)125 static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
126 return kMultTables[!inverse][a];
127 }
128
129 #else
130
GetScale(uint32_t a,int inverse)131 static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
132 return inverse ? (255u << MFIX) / a : a * KINV_255;
133 }
134
135 #endif // USE_TABLES_FOR_ALPHA_MULT
136
WebPMultARGBRowC(uint32_t * const ptr,int width,int inverse)137 void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
138 int x;
139 for (x = 0; x < width; ++x) {
140 const uint32_t argb = ptr[x];
141 if (argb < 0xff000000u) { // alpha < 255
142 if (argb <= 0x00ffffffu) { // alpha == 0
143 ptr[x] = 0;
144 } else {
145 const uint32_t alpha = (argb >> 24) & 0xff;
146 const uint32_t scale = GetScale(alpha, inverse);
147 uint32_t out = argb & 0xff000000u;
148 out |= Mult(argb >> 0, scale) << 0;
149 out |= Mult(argb >> 8, scale) << 8;
150 out |= Mult(argb >> 16, scale) << 16;
151 ptr[x] = out;
152 }
153 }
154 }
155 }
156
WebPMultRowC(uint8_t * const ptr,const uint8_t * const alpha,int width,int inverse)157 void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
158 int width, int inverse) {
159 int x;
160 for (x = 0; x < width; ++x) {
161 const uint32_t a = alpha[x];
162 if (a != 255) {
163 if (a == 0) {
164 ptr[x] = 0;
165 } else {
166 const uint32_t scale = GetScale(a, inverse);
167 ptr[x] = Mult(ptr[x], scale);
168 }
169 }
170 }
171 }
172
173 #undef KINV_255
174 #undef HALF
175 #undef MFIX
176
177 void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
178 void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
179 int width, int inverse);
180
181 //------------------------------------------------------------------------------
182 // Generic per-plane calls
183
WebPMultARGBRows(uint8_t * ptr,int stride,int width,int num_rows,int inverse)184 void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
185 int inverse) {
186 int n;
187 for (n = 0; n < num_rows; ++n) {
188 WebPMultARGBRow((uint32_t*)ptr, width, inverse);
189 ptr += stride;
190 }
191 }
192
WebPMultRows(uint8_t * ptr,int stride,const uint8_t * alpha,int alpha_stride,int width,int num_rows,int inverse)193 void WebPMultRows(uint8_t* ptr, int stride,
194 const uint8_t* alpha, int alpha_stride,
195 int width, int num_rows, int inverse) {
196 int n;
197 for (n = 0; n < num_rows; ++n) {
198 WebPMultRow(ptr, alpha, width, inverse);
199 ptr += stride;
200 alpha += alpha_stride;
201 }
202 }
203
204 //------------------------------------------------------------------------------
205 // Premultiplied modes
206
207 // non dithered-modes
208
209 // (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
210 // for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
211 // one can use instead: (x * a * 65793 + (1 << 23)) >> 24
212 #if 1 // (int)(x * a / 255.)
213 #define MULTIPLIER(a) ((a) * 32897U)
214 #define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
215 #else // (int)(x * a / 255. + .5)
216 #define MULTIPLIER(a) ((a) * 65793U)
217 #define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
218 #endif
219
ApplyAlphaMultiply(uint8_t * rgba,int alpha_first,int w,int h,int stride)220 static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
221 int w, int h, int stride) {
222 while (h-- > 0) {
223 uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
224 const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
225 int i;
226 for (i = 0; i < w; ++i) {
227 const uint32_t a = alpha[4 * i];
228 if (a != 0xff) {
229 const uint32_t mult = MULTIPLIER(a);
230 rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
231 rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
232 rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
233 }
234 }
235 rgba += stride;
236 }
237 }
238 #undef MULTIPLIER
239 #undef PREMULTIPLY
240
241 // rgbA4444
242
243 #define MULTIPLIER(a) ((a) * 0x1111) // 0x1111 ~= (1 << 16) / 15
244
dither_hi(uint8_t x)245 static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
246 return (x & 0xf0) | (x >> 4);
247 }
248
dither_lo(uint8_t x)249 static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
250 return (x & 0x0f) | (x << 4);
251 }
252
multiply(uint8_t x,uint32_t m)253 static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
254 return (x * m) >> 16;
255 }
256
ApplyAlphaMultiply4444(uint8_t * rgba4444,int w,int h,int stride,int rg_byte_pos)257 static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
258 int w, int h, int stride,
259 int rg_byte_pos /* 0 or 1 */) {
260 while (h-- > 0) {
261 int i;
262 for (i = 0; i < w; ++i) {
263 const uint32_t rg = rgba4444[2 * i + rg_byte_pos];
264 const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)];
265 const uint8_t a = ba & 0x0f;
266 const uint32_t mult = MULTIPLIER(a);
267 const uint8_t r = multiply(dither_hi(rg), mult);
268 const uint8_t g = multiply(dither_lo(rg), mult);
269 const uint8_t b = multiply(dither_hi(ba), mult);
270 rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f);
271 rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a;
272 }
273 rgba4444 += stride;
274 }
275 }
276 #undef MULTIPLIER
277
ApplyAlphaMultiply_16b(uint8_t * rgba4444,int w,int h,int stride)278 static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
279 int w, int h, int stride) {
280 #ifdef WEBP_SWAP_16BIT_CSP
281 ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
282 #else
283 ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
284 #endif
285 }
286
DispatchAlpha(const uint8_t * alpha,int alpha_stride,int width,int height,uint8_t * dst,int dst_stride)287 static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
288 int width, int height,
289 uint8_t* dst, int dst_stride) {
290 uint32_t alpha_mask = 0xff;
291 int i, j;
292
293 for (j = 0; j < height; ++j) {
294 for (i = 0; i < width; ++i) {
295 const uint32_t alpha_value = alpha[i];
296 dst[4 * i] = alpha_value;
297 alpha_mask &= alpha_value;
298 }
299 alpha += alpha_stride;
300 dst += dst_stride;
301 }
302
303 return (alpha_mask != 0xff);
304 }
305
DispatchAlphaToGreen(const uint8_t * alpha,int alpha_stride,int width,int height,uint32_t * dst,int dst_stride)306 static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
307 int width, int height,
308 uint32_t* dst, int dst_stride) {
309 int i, j;
310 for (j = 0; j < height; ++j) {
311 for (i = 0; i < width; ++i) {
312 dst[i] = alpha[i] << 8; // leave A/R/B channels zero'd.
313 }
314 alpha += alpha_stride;
315 dst += dst_stride;
316 }
317 }
318
ExtractAlpha(const uint8_t * argb,int argb_stride,int width,int height,uint8_t * alpha,int alpha_stride)319 static int ExtractAlpha(const uint8_t* argb, int argb_stride,
320 int width, int height,
321 uint8_t* alpha, int alpha_stride) {
322 uint8_t alpha_mask = 0xff;
323 int i, j;
324
325 for (j = 0; j < height; ++j) {
326 for (i = 0; i < width; ++i) {
327 const uint8_t alpha_value = argb[4 * i];
328 alpha[i] = alpha_value;
329 alpha_mask &= alpha_value;
330 }
331 argb += argb_stride;
332 alpha += alpha_stride;
333 }
334 return (alpha_mask == 0xff);
335 }
336
337 void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
338 void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
339 int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
340 void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
341 int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
342
343 //------------------------------------------------------------------------------
344 // Init function
345
346 extern void WebPInitAlphaProcessingMIPSdspR2(void);
347 extern void WebPInitAlphaProcessingSSE2(void);
348 extern void WebPInitAlphaProcessingSSE41(void);
349
350 static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
351 (VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
352
WebPInitAlphaProcessing(void)353 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
354 if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
355
356 WebPMultARGBRow = WebPMultARGBRowC;
357 WebPMultRow = WebPMultRowC;
358 WebPApplyAlphaMultiply = ApplyAlphaMultiply;
359 WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
360 WebPDispatchAlpha = DispatchAlpha;
361 WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
362 WebPExtractAlpha = ExtractAlpha;
363
364 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
365 if (VP8GetCPUInfo != NULL) {
366 #if defined(WEBP_USE_SSE2)
367 if (VP8GetCPUInfo(kSSE2)) {
368 WebPInitAlphaProcessingSSE2();
369 #if defined(WEBP_USE_SSE41)
370 if (VP8GetCPUInfo(kSSE4_1)) {
371 WebPInitAlphaProcessingSSE41();
372 }
373 #endif
374 }
375 #endif
376 #if defined(WEBP_USE_MIPS_DSP_R2)
377 if (VP8GetCPUInfo(kMIPSdspR2)) {
378 WebPInitAlphaProcessingMIPSdspR2();
379 }
380 #endif
381 }
382 alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
383 }
384