1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transforms and color space conversion methods for lossless decoder.
11 //
12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 // Jyrki Alakuijala (jyrki@google.com)
14 // Urvang Joshi (urvang@google.com)
15
16 #include "./dsp.h"
17
18 // Define the following if target arch is sure to have SSE2
19 // #define WEBP_TARGET_HAS_SSE2
20
21 #if defined(__cplusplus) || defined(c_plusplus)
22 extern "C" {
23 #endif
24
25 #if defined(WEBP_TARGET_HAS_SSE2)
26 #include <emmintrin.h>
27 #endif
28
29 #include <math.h>
30 #include <stdlib.h>
31 #include "./lossless.h"
32 #include "../dec/vp8li.h"
33 #include "./yuv.h"
34
35 #define MAX_DIFF_COST (1e30f)
36
37 // lookup table for small values of log2(int)
38 #define APPROX_LOG_MAX 4096
39 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
40 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
41 0.0000000000000000f, 0.0000000000000000f,
42 1.0000000000000000f, 1.5849625007211560f,
43 2.0000000000000000f, 2.3219280948873621f,
44 2.5849625007211560f, 2.8073549220576041f,
45 3.0000000000000000f, 3.1699250014423121f,
46 3.3219280948873621f, 3.4594316186372973f,
47 3.5849625007211560f, 3.7004397181410921f,
48 3.8073549220576041f, 3.9068905956085187f,
49 4.0000000000000000f, 4.0874628412503390f,
50 4.1699250014423121f, 4.2479275134435852f,
51 4.3219280948873626f, 4.3923174227787606f,
52 4.4594316186372973f, 4.5235619560570130f,
53 4.5849625007211560f, 4.6438561897747243f,
54 4.7004397181410917f, 4.7548875021634682f,
55 4.8073549220576037f, 4.8579809951275718f,
56 4.9068905956085187f, 4.9541963103868749f,
57 5.0000000000000000f, 5.0443941193584533f,
58 5.0874628412503390f, 5.1292830169449663f,
59 5.1699250014423121f, 5.2094533656289501f,
60 5.2479275134435852f, 5.2854022188622487f,
61 5.3219280948873626f, 5.3575520046180837f,
62 5.3923174227787606f, 5.4262647547020979f,
63 5.4594316186372973f, 5.4918530963296747f,
64 5.5235619560570130f, 5.5545888516776376f,
65 5.5849625007211560f, 5.6147098441152083f,
66 5.6438561897747243f, 5.6724253419714951f,
67 5.7004397181410917f, 5.7279204545631987f,
68 5.7548875021634682f, 5.7813597135246599f,
69 5.8073549220576037f, 5.8328900141647412f,
70 5.8579809951275718f, 5.8826430493618415f,
71 5.9068905956085187f, 5.9307373375628866f,
72 5.9541963103868749f, 5.9772799234999167f,
73 6.0000000000000000f, 6.0223678130284543f,
74 6.0443941193584533f, 6.0660891904577720f,
75 6.0874628412503390f, 6.1085244567781691f,
76 6.1292830169449663f, 6.1497471195046822f,
77 6.1699250014423121f, 6.1898245588800175f,
78 6.2094533656289501f, 6.2288186904958804f,
79 6.2479275134435852f, 6.2667865406949010f,
80 6.2854022188622487f, 6.3037807481771030f,
81 6.3219280948873626f, 6.3398500028846243f,
82 6.3575520046180837f, 6.3750394313469245f,
83 6.3923174227787606f, 6.4093909361377017f,
84 6.4262647547020979f, 6.4429434958487279f,
85 6.4594316186372973f, 6.4757334309663976f,
86 6.4918530963296747f, 6.5077946401986963f,
87 6.5235619560570130f, 6.5391588111080309f,
88 6.5545888516776376f, 6.5698556083309478f,
89 6.5849625007211560f, 6.5999128421871278f,
90 6.6147098441152083f, 6.6293566200796094f,
91 6.6438561897747243f, 6.6582114827517946f,
92 6.6724253419714951f, 6.6865005271832185f,
93 6.7004397181410917f, 6.7142455176661224f,
94 6.7279204545631987f, 6.7414669864011464f,
95 6.7548875021634682f, 6.7681843247769259f,
96 6.7813597135246599f, 6.7944158663501061f,
97 6.8073549220576037f, 6.8201789624151878f,
98 6.8328900141647412f, 6.8454900509443747f,
99 6.8579809951275718f, 6.8703647195834047f,
100 6.8826430493618415f, 6.8948177633079437f,
101 6.9068905956085187f, 6.9188632372745946f,
102 6.9307373375628866f, 6.9425145053392398f,
103 6.9541963103868749f, 6.9657842846620869f,
104 6.9772799234999167f, 6.9886846867721654f,
105 7.0000000000000000f, 7.0112272554232539f,
106 7.0223678130284543f, 7.0334230015374501f,
107 7.0443941193584533f, 7.0552824355011898f,
108 7.0660891904577720f, 7.0768155970508308f,
109 7.0874628412503390f, 7.0980320829605263f,
110 7.1085244567781691f, 7.1189410727235076f,
111 7.1292830169449663f, 7.1395513523987936f,
112 7.1497471195046822f, 7.1598713367783890f,
113 7.1699250014423121f, 7.1799090900149344f,
114 7.1898245588800175f, 7.1996723448363644f,
115 7.2094533656289501f, 7.2191685204621611f,
116 7.2288186904958804f, 7.2384047393250785f,
117 7.2479275134435852f, 7.2573878426926521f,
118 7.2667865406949010f, 7.2761244052742375f,
119 7.2854022188622487f, 7.2946207488916270f,
120 7.3037807481771030f, 7.3128829552843557f,
121 7.3219280948873626f, 7.3309168781146167f,
122 7.3398500028846243f, 7.3487281542310771f,
123 7.3575520046180837f, 7.3663222142458160f,
124 7.3750394313469245f, 7.3837042924740519f,
125 7.3923174227787606f, 7.4008794362821843f,
126 7.4093909361377017f, 7.4178525148858982f,
127 7.4262647547020979f, 7.4346282276367245f,
128 7.4429434958487279f, 7.4512111118323289f,
129 7.4594316186372973f, 7.4676055500829976f,
130 7.4757334309663976f, 7.4838157772642563f,
131 7.4918530963296747f, 7.4998458870832056f,
132 7.5077946401986963f, 7.5156998382840427f,
133 7.5235619560570130f, 7.5313814605163118f,
134 7.5391588111080309f, 7.5468944598876364f,
135 7.5545888516776376f, 7.5622424242210728f,
136 7.5698556083309478f, 7.5774288280357486f,
137 7.5849625007211560f, 7.5924570372680806f,
138 7.5999128421871278f, 7.6073303137496104f,
139 7.6147098441152083f, 7.6220518194563764f,
140 7.6293566200796094f, 7.6366246205436487f,
141 7.6438561897747243f, 7.6510516911789281f,
142 7.6582114827517946f, 7.6653359171851764f,
143 7.6724253419714951f, 7.6794800995054464f,
144 7.6865005271832185f, 7.6934869574993252f,
145 7.7004397181410917f, 7.7073591320808825f,
146 7.7142455176661224f, 7.7210991887071855f,
147 7.7279204545631987f, 7.7347096202258383f,
148 7.7414669864011464f, 7.7481928495894605f,
149 7.7548875021634682f, 7.7615512324444795f,
150 7.7681843247769259f, 7.7747870596011736f,
151 7.7813597135246599f, 7.7879025593914317f,
152 7.7944158663501061f, 7.8008998999203047f,
153 7.8073549220576037f, 7.8137811912170374f,
154 7.8201789624151878f, 7.8265484872909150f,
155 7.8328900141647412f, 7.8392037880969436f,
156 7.8454900509443747f, 7.8517490414160571f,
157 7.8579809951275718f, 7.8641861446542797f,
158 7.8703647195834047f, 7.8765169465649993f,
159 7.8826430493618415f, 7.8887432488982591f,
160 7.8948177633079437f, 7.9008668079807486f,
161 7.9068905956085187f, 7.9128893362299619f,
162 7.9188632372745946f, 7.9248125036057812f,
163 7.9307373375628866f, 7.9366379390025709f,
164 7.9425145053392398f, 7.9483672315846778f,
165 7.9541963103868749f, 7.9600019320680805f,
166 7.9657842846620869f, 7.9715435539507719f,
167 7.9772799234999167f, 7.9829935746943103f,
168 7.9886846867721654f, 7.9943534368588577f
169 };
170
171 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
172 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
173 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
174 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
175 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
176 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
177 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
178 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
179 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
180 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
181 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
182 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
183 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
184 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
185 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
186 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
187 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
188 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
189 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
190 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
191 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
192 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
193 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
194 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
195 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
196 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
197 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
198 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
199 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
200 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
201 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
202 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
203 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
204 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
205 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
206 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
207 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
208 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
209 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
210 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
211 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
212 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
213 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
214 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
215 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
216 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
217 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
218 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
219 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
220 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
221 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
222 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
223 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
224 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
225 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
226 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
227 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
228 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
229 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
230 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
231 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
232 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
233 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
234 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
235 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
236 };
237
VP8LFastSLog2Slow(int v)238 float VP8LFastSLog2Slow(int v) {
239 assert(v >= LOG_LOOKUP_IDX_MAX);
240 if (v < APPROX_LOG_MAX) {
241 int log_cnt = 0;
242 const float v_f = (float)v;
243 while (v >= LOG_LOOKUP_IDX_MAX) {
244 ++log_cnt;
245 v = v >> 1;
246 }
247 return v_f * (kLog2Table[v] + log_cnt);
248 } else {
249 return (float)(LOG_2_RECIPROCAL * v * log((double)v));
250 }
251 }
252
VP8LFastLog2Slow(int v)253 float VP8LFastLog2Slow(int v) {
254 assert(v >= LOG_LOOKUP_IDX_MAX);
255 if (v < APPROX_LOG_MAX) {
256 int log_cnt = 0;
257 while (v >= LOG_LOOKUP_IDX_MAX) {
258 ++log_cnt;
259 v = v >> 1;
260 }
261 return kLog2Table[v] + log_cnt;
262 } else {
263 return (float)(LOG_2_RECIPROCAL * log((double)v));
264 }
265 }
266
267 //------------------------------------------------------------------------------
268 // Image transforms.
269
270 // In-place sum of each component with mod 256.
AddPixelsEq(uint32_t * a,uint32_t b)271 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
272 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
273 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
274 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
275 }
276
Average2(uint32_t a0,uint32_t a1)277 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
278 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
279 }
280
Average3(uint32_t a0,uint32_t a1,uint32_t a2)281 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
282 return Average2(Average2(a0, a2), a1);
283 }
284
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)285 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
286 uint32_t a2, uint32_t a3) {
287 return Average2(Average2(a0, a1), Average2(a2, a3));
288 }
289
290 #if defined(WEBP_TARGET_HAS_SSE2)
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)291 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
292 uint32_t c2) {
293 const __m128i zero = _mm_setzero_si128();
294 const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
295 const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
296 const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
297 const __m128i V1 = _mm_add_epi16(C0, C1);
298 const __m128i V2 = _mm_sub_epi16(V1, C2);
299 const __m128i b = _mm_packus_epi16(V2, V2);
300 const uint32_t output = _mm_cvtsi128_si32(b);
301 return output;
302 }
303
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)304 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
305 uint32_t c2) {
306 const uint32_t ave = Average2(c0, c1);
307 const __m128i zero = _mm_setzero_si128();
308 const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
309 const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
310 const __m128i A1 = _mm_sub_epi16(A0, B0);
311 const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
312 const __m128i A2 = _mm_sub_epi16(A1, BgtA);
313 const __m128i A3 = _mm_srai_epi16(A2, 1);
314 const __m128i A4 = _mm_add_epi16(A0, A3);
315 const __m128i A5 = _mm_packus_epi16(A4, A4);
316 const uint32_t output = _mm_cvtsi128_si32(A5);
317 return output;
318 }
319
Select(uint32_t a,uint32_t b,uint32_t c)320 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
321 int pa_minus_pb;
322 const __m128i zero = _mm_setzero_si128();
323 const __m128i A0 = _mm_cvtsi32_si128(a);
324 const __m128i B0 = _mm_cvtsi32_si128(b);
325 const __m128i C0 = _mm_cvtsi32_si128(c);
326 const __m128i AC0 = _mm_subs_epu8(A0, C0);
327 const __m128i CA0 = _mm_subs_epu8(C0, A0);
328 const __m128i BC0 = _mm_subs_epu8(B0, C0);
329 const __m128i CB0 = _mm_subs_epu8(C0, B0);
330 const __m128i AC = _mm_or_si128(AC0, CA0);
331 const __m128i BC = _mm_or_si128(BC0, CB0);
332 const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
333 const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
334 const __m128i diff = _mm_sub_epi16(pb, pa);
335 {
336 int16_t out[8];
337 _mm_storeu_si128((__m128i*)out, diff);
338 pa_minus_pb = out[0] + out[1] + out[2] + out[3];
339 }
340 return (pa_minus_pb <= 0) ? a : b;
341 }
342
343 #else
344
Clip255(uint32_t a)345 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
346 if (a < 256) {
347 return a;
348 }
349 // return 0, when a is a negative integer.
350 // return 255, when a is positive.
351 return ~a >> 24;
352 }
353
AddSubtractComponentFull(int a,int b,int c)354 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
355 return Clip255(a + b - c);
356 }
357
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)358 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
359 uint32_t c2) {
360 const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
361 const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
362 (c1 >> 16) & 0xff,
363 (c2 >> 16) & 0xff);
364 const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
365 (c1 >> 8) & 0xff,
366 (c2 >> 8) & 0xff);
367 const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
368 return (a << 24) | (r << 16) | (g << 8) | b;
369 }
370
AddSubtractComponentHalf(int a,int b)371 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
372 return Clip255(a + (a - b) / 2);
373 }
374
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)375 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
376 uint32_t c2) {
377 const uint32_t ave = Average2(c0, c1);
378 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
379 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
380 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
381 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
382 return (a << 24) | (r << 16) | (g << 8) | b;
383 }
384
Sub3(int a,int b,int c)385 static WEBP_INLINE int Sub3(int a, int b, int c) {
386 const int pb = b - c;
387 const int pa = a - c;
388 return abs(pb) - abs(pa);
389 }
390
Select(uint32_t a,uint32_t b,uint32_t c)391 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
392 const int pa_minus_pb =
393 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
394 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
395 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
396 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
397 return (pa_minus_pb <= 0) ? a : b;
398 }
399 #endif
400
401 //------------------------------------------------------------------------------
402 // Predictors
403
Predictor0(uint32_t left,const uint32_t * const top)404 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
405 (void)top;
406 (void)left;
407 return ARGB_BLACK;
408 }
Predictor1(uint32_t left,const uint32_t * const top)409 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
410 (void)top;
411 return left;
412 }
Predictor2(uint32_t left,const uint32_t * const top)413 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
414 (void)left;
415 return top[0];
416 }
Predictor3(uint32_t left,const uint32_t * const top)417 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
418 (void)left;
419 return top[1];
420 }
Predictor4(uint32_t left,const uint32_t * const top)421 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
422 (void)left;
423 return top[-1];
424 }
Predictor5(uint32_t left,const uint32_t * const top)425 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
426 const uint32_t pred = Average3(left, top[0], top[1]);
427 return pred;
428 }
Predictor6(uint32_t left,const uint32_t * const top)429 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
430 const uint32_t pred = Average2(left, top[-1]);
431 return pred;
432 }
Predictor7(uint32_t left,const uint32_t * const top)433 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
434 const uint32_t pred = Average2(left, top[0]);
435 return pred;
436 }
Predictor8(uint32_t left,const uint32_t * const top)437 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
438 const uint32_t pred = Average2(top[-1], top[0]);
439 (void)left;
440 return pred;
441 }
Predictor9(uint32_t left,const uint32_t * const top)442 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
443 const uint32_t pred = Average2(top[0], top[1]);
444 (void)left;
445 return pred;
446 }
Predictor10(uint32_t left,const uint32_t * const top)447 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
448 const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
449 return pred;
450 }
Predictor11(uint32_t left,const uint32_t * const top)451 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
452 const uint32_t pred = Select(top[0], left, top[-1]);
453 return pred;
454 }
Predictor12(uint32_t left,const uint32_t * const top)455 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
456 const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
457 return pred;
458 }
Predictor13(uint32_t left,const uint32_t * const top)459 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
460 const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
461 return pred;
462 }
463
464 typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
465 static const PredictorFunc kPredictors[16] = {
466 Predictor0, Predictor1, Predictor2, Predictor3,
467 Predictor4, Predictor5, Predictor6, Predictor7,
468 Predictor8, Predictor9, Predictor10, Predictor11,
469 Predictor12, Predictor13,
470 Predictor0, Predictor0 // <- padding security sentinels
471 };
472
473 // TODO(vikasa): Replace 256 etc with defines.
PredictionCostSpatial(const int * counts,int weight_0,double exp_val)474 static float PredictionCostSpatial(const int* counts,
475 int weight_0, double exp_val) {
476 const int significant_symbols = 16;
477 const double exp_decay_factor = 0.6;
478 double bits = weight_0 * counts[0];
479 int i;
480 for (i = 1; i < significant_symbols; ++i) {
481 bits += exp_val * (counts[i] + counts[256 - i]);
482 exp_val *= exp_decay_factor;
483 }
484 return (float)(-0.1 * bits);
485 }
486
487 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
CombinedShannonEntropy(const int * const X,const int * const Y,int n)488 static float CombinedShannonEntropy(const int* const X,
489 const int* const Y, int n) {
490 int i;
491 double retval = 0.;
492 int sumX = 0, sumXY = 0;
493 for (i = 0; i < n; ++i) {
494 const int x = X[i];
495 const int xy = X[i] + Y[i];
496 if (x != 0) {
497 sumX += x;
498 retval -= VP8LFastSLog2(x);
499 }
500 if (xy != 0) {
501 sumXY += xy;
502 retval -= VP8LFastSLog2(xy);
503 }
504 }
505 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
506 return (float)retval;
507 }
508
PredictionCostSpatialHistogram(int accumulated[4][256],int tile[4][256])509 static float PredictionCostSpatialHistogram(int accumulated[4][256],
510 int tile[4][256]) {
511 int i;
512 double retval = 0;
513 for (i = 0; i < 4; ++i) {
514 const double kExpValue = 0.94;
515 retval += PredictionCostSpatial(tile[i], 1, kExpValue);
516 retval += CombinedShannonEntropy(tile[i], accumulated[i], 256);
517 }
518 return (float)retval;
519 }
520
GetBestPredictorForTile(int width,int height,int tile_x,int tile_y,int bits,int accumulated[4][256],const uint32_t * const argb_scratch)521 static int GetBestPredictorForTile(int width, int height,
522 int tile_x, int tile_y, int bits,
523 int accumulated[4][256],
524 const uint32_t* const argb_scratch) {
525 const int kNumPredModes = 14;
526 const int col_start = tile_x << bits;
527 const int row_start = tile_y << bits;
528 const int tile_size = 1 << bits;
529 const int ymax = (tile_size <= height - row_start) ?
530 tile_size : height - row_start;
531 const int xmax = (tile_size <= width - col_start) ?
532 tile_size : width - col_start;
533 int histo[4][256];
534 float best_diff = MAX_DIFF_COST;
535 int best_mode = 0;
536
537 int mode;
538 for (mode = 0; mode < kNumPredModes; ++mode) {
539 const uint32_t* current_row = argb_scratch;
540 const PredictorFunc pred_func = kPredictors[mode];
541 float cur_diff;
542 int y;
543 memset(&histo[0][0], 0, sizeof(histo));
544 for (y = 0; y < ymax; ++y) {
545 int x;
546 const int row = row_start + y;
547 const uint32_t* const upper_row = current_row;
548 current_row = upper_row + width;
549 for (x = 0; x < xmax; ++x) {
550 const int col = col_start + x;
551 uint32_t predict;
552 uint32_t predict_diff;
553 if (row == 0) {
554 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
555 } else if (col == 0) {
556 predict = upper_row[col]; // Top.
557 } else {
558 predict = pred_func(current_row[col - 1], upper_row + col);
559 }
560 predict_diff = VP8LSubPixels(current_row[col], predict);
561 ++histo[0][predict_diff >> 24];
562 ++histo[1][((predict_diff >> 16) & 0xff)];
563 ++histo[2][((predict_diff >> 8) & 0xff)];
564 ++histo[3][(predict_diff & 0xff)];
565 }
566 }
567 cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
568 if (cur_diff < best_diff) {
569 best_diff = cur_diff;
570 best_mode = mode;
571 }
572 }
573
574 return best_mode;
575 }
576
CopyTileWithPrediction(int width,int height,int tile_x,int tile_y,int bits,int mode,const uint32_t * const argb_scratch,uint32_t * const argb)577 static void CopyTileWithPrediction(int width, int height,
578 int tile_x, int tile_y, int bits, int mode,
579 const uint32_t* const argb_scratch,
580 uint32_t* const argb) {
581 const int col_start = tile_x << bits;
582 const int row_start = tile_y << bits;
583 const int tile_size = 1 << bits;
584 const int ymax = (tile_size <= height - row_start) ?
585 tile_size : height - row_start;
586 const int xmax = (tile_size <= width - col_start) ?
587 tile_size : width - col_start;
588 const PredictorFunc pred_func = kPredictors[mode];
589 const uint32_t* current_row = argb_scratch;
590
591 int y;
592 for (y = 0; y < ymax; ++y) {
593 int x;
594 const int row = row_start + y;
595 const uint32_t* const upper_row = current_row;
596 current_row = upper_row + width;
597 for (x = 0; x < xmax; ++x) {
598 const int col = col_start + x;
599 const int pix = row * width + col;
600 uint32_t predict;
601 if (row == 0) {
602 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
603 } else if (col == 0) {
604 predict = upper_row[col]; // Top.
605 } else {
606 predict = pred_func(current_row[col - 1], upper_row + col);
607 }
608 argb[pix] = VP8LSubPixels(current_row[col], predict);
609 }
610 }
611 }
612
VP8LResidualImage(int width,int height,int bits,uint32_t * const argb,uint32_t * const argb_scratch,uint32_t * const image)613 void VP8LResidualImage(int width, int height, int bits,
614 uint32_t* const argb, uint32_t* const argb_scratch,
615 uint32_t* const image) {
616 const int max_tile_size = 1 << bits;
617 const int tiles_per_row = VP8LSubSampleSize(width, bits);
618 const int tiles_per_col = VP8LSubSampleSize(height, bits);
619 uint32_t* const upper_row = argb_scratch;
620 uint32_t* const current_tile_rows = argb_scratch + width;
621 int tile_y;
622 int histo[4][256];
623 memset(histo, 0, sizeof(histo));
624 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
625 const int tile_y_offset = tile_y * max_tile_size;
626 const int this_tile_height =
627 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
628 int tile_x;
629 if (tile_y > 0) {
630 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
631 width * sizeof(*upper_row));
632 }
633 memcpy(current_tile_rows, &argb[tile_y_offset * width],
634 this_tile_height * width * sizeof(*current_tile_rows));
635 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
636 int pred;
637 int y;
638 const int tile_x_offset = tile_x * max_tile_size;
639 int all_x_max = tile_x_offset + max_tile_size;
640 if (all_x_max > width) {
641 all_x_max = width;
642 }
643 pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
644 argb_scratch);
645 image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
646 CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
647 argb_scratch, argb);
648 for (y = 0; y < max_tile_size; ++y) {
649 int ix;
650 int all_x;
651 int all_y = tile_y_offset + y;
652 if (all_y >= height) {
653 break;
654 }
655 ix = all_y * width + tile_x_offset;
656 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
657 const uint32_t a = argb[ix];
658 ++histo[0][a >> 24];
659 ++histo[1][((a >> 16) & 0xff)];
660 ++histo[2][((a >> 8) & 0xff)];
661 ++histo[3][(a & 0xff)];
662 }
663 }
664 }
665 }
666 }
667
668 // Inverse prediction.
PredictorInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)669 static void PredictorInverseTransform(const VP8LTransform* const transform,
670 int y_start, int y_end, uint32_t* data) {
671 const int width = transform->xsize_;
672 if (y_start == 0) { // First Row follows the L (mode=1) mode.
673 int x;
674 const uint32_t pred0 = Predictor0(data[-1], NULL);
675 AddPixelsEq(data, pred0);
676 for (x = 1; x < width; ++x) {
677 const uint32_t pred1 = Predictor1(data[x - 1], NULL);
678 AddPixelsEq(data + x, pred1);
679 }
680 data += width;
681 ++y_start;
682 }
683
684 {
685 int y = y_start;
686 const int mask = (1 << transform->bits_) - 1;
687 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
688 const uint32_t* pred_mode_base =
689 transform->data_ + (y >> transform->bits_) * tiles_per_row;
690
691 while (y < y_end) {
692 int x;
693 const uint32_t pred2 = Predictor2(data[-1], data - width);
694 const uint32_t* pred_mode_src = pred_mode_base;
695 PredictorFunc pred_func;
696
697 // First pixel follows the T (mode=2) mode.
698 AddPixelsEq(data, pred2);
699
700 // .. the rest:
701 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
702 for (x = 1; x < width; ++x) {
703 uint32_t pred;
704 if ((x & mask) == 0) { // start of tile. Read predictor function.
705 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
706 }
707 pred = pred_func(data[x - 1], data + x - width);
708 AddPixelsEq(data + x, pred);
709 }
710 data += width;
711 ++y;
712 if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
713 pred_mode_base += tiles_per_row;
714 }
715 }
716 }
717 }
718
VP8LSubtractGreenFromBlueAndRed(uint32_t * argb_data,int num_pixs)719 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
720 int i = 0;
721 #if defined(WEBP_TARGET_HAS_SSE2)
722 const __m128i mask = _mm_set1_epi32(0x0000ff00);
723 for (; i + 4 < num_pixs; i += 4) {
724 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
725 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
726 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
727 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
728 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
729 const __m128i out = _mm_sub_epi8(in, in_0g0g);
730 _mm_storeu_si128((__m128i*)&argb_data[i], out);
731 }
732 // fallthrough and finish off with plain-C
733 #endif
734 for (; i < num_pixs; ++i) {
735 const uint32_t argb = argb_data[i];
736 const uint32_t green = (argb >> 8) & 0xff;
737 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
738 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
739 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
740 }
741 }
742
743 // Add green to blue and red channels (i.e. perform the inverse transform of
744 // 'subtract green').
AddGreenToBlueAndRed(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)745 static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
746 int y_start, int y_end, uint32_t* data) {
747 const int width = transform->xsize_;
748 const uint32_t* const data_end = data + (y_end - y_start) * width;
749 #if defined(WEBP_TARGET_HAS_SSE2)
750 const __m128i mask = _mm_set1_epi32(0x0000ff00);
751 for (; data + 4 < data_end; data += 4) {
752 const __m128i in = _mm_loadu_si128((__m128i*)data);
753 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
754 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
755 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
756 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
757 const __m128i out = _mm_add_epi8(in, in_0g0g);
758 _mm_storeu_si128((__m128i*)data, out);
759 }
760 // fallthrough and finish off with plain-C
761 #endif
762 while (data < data_end) {
763 const uint32_t argb = *data;
764 const uint32_t green = ((argb >> 8) & 0xff);
765 uint32_t red_blue = (argb & 0x00ff00ffu);
766 red_blue += (green << 16) | green;
767 red_blue &= 0x00ff00ffu;
768 *data++ = (argb & 0xff00ff00u) | red_blue;
769 }
770 }
771
772 typedef struct {
773 // Note: the members are uint8_t, so that any negative values are
774 // automatically converted to "mod 256" values.
775 uint8_t green_to_red_;
776 uint8_t green_to_blue_;
777 uint8_t red_to_blue_;
778 } Multipliers;
779
MultipliersClear(Multipliers * m)780 static WEBP_INLINE void MultipliersClear(Multipliers* m) {
781 m->green_to_red_ = 0;
782 m->green_to_blue_ = 0;
783 m->red_to_blue_ = 0;
784 }
785
ColorTransformDelta(int8_t color_pred,int8_t color)786 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
787 int8_t color) {
788 return (uint32_t)((int)(color_pred) * color) >> 5;
789 }
790
ColorCodeToMultipliers(uint32_t color_code,Multipliers * const m)791 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
792 Multipliers* const m) {
793 m->green_to_red_ = (color_code >> 0) & 0xff;
794 m->green_to_blue_ = (color_code >> 8) & 0xff;
795 m->red_to_blue_ = (color_code >> 16) & 0xff;
796 }
797
MultipliersToColorCode(Multipliers * const m)798 static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
799 return 0xff000000u |
800 ((uint32_t)(m->red_to_blue_) << 16) |
801 ((uint32_t)(m->green_to_blue_) << 8) |
802 m->green_to_red_;
803 }
804
TransformColor(const Multipliers * const m,uint32_t argb,int inverse)805 static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
806 uint32_t argb, int inverse) {
807 const uint32_t green = argb >> 8;
808 const uint32_t red = argb >> 16;
809 uint32_t new_red = red;
810 uint32_t new_blue = argb;
811
812 if (inverse) {
813 new_red += ColorTransformDelta(m->green_to_red_, green);
814 new_red &= 0xff;
815 new_blue += ColorTransformDelta(m->green_to_blue_, green);
816 new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
817 new_blue &= 0xff;
818 } else {
819 new_red -= ColorTransformDelta(m->green_to_red_, green);
820 new_red &= 0xff;
821 new_blue -= ColorTransformDelta(m->green_to_blue_, green);
822 new_blue -= ColorTransformDelta(m->red_to_blue_, red);
823 new_blue &= 0xff;
824 }
825 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
826 }
827
TransformColorRed(uint8_t green_to_red,uint32_t argb)828 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
829 uint32_t argb) {
830 const uint32_t green = argb >> 8;
831 uint32_t new_red = argb >> 16;
832 new_red -= ColorTransformDelta(green_to_red, green);
833 return (new_red & 0xff);
834 }
835
TransformColorBlue(uint8_t green_to_blue,uint8_t red_to_blue,uint32_t argb)836 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
837 uint8_t red_to_blue,
838 uint32_t argb) {
839 const uint32_t green = argb >> 8;
840 const uint32_t red = argb >> 16;
841 uint8_t new_blue = argb;
842 new_blue -= ColorTransformDelta(green_to_blue, green);
843 new_blue -= ColorTransformDelta(red_to_blue, red);
844 return (new_blue & 0xff);
845 }
846
SkipRepeatedPixels(const uint32_t * const argb,int ix,int xsize)847 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
848 int ix, int xsize) {
849 const uint32_t v = argb[ix];
850 if (ix >= xsize + 3) {
851 if (v == argb[ix - xsize] &&
852 argb[ix - 1] == argb[ix - xsize - 1] &&
853 argb[ix - 2] == argb[ix - xsize - 2] &&
854 argb[ix - 3] == argb[ix - xsize - 3]) {
855 return 1;
856 }
857 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
858 } else if (ix >= 3) {
859 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
860 }
861 return 0;
862 }
863
PredictionCostCrossColor(const int accumulated[256],const int counts[256])864 static float PredictionCostCrossColor(const int accumulated[256],
865 const int counts[256]) {
866 // Favor low entropy, locally and globally.
867 // Favor small absolute values for PredictionCostSpatial
868 static const double kExpValue = 2.4;
869 return CombinedShannonEntropy(counts, accumulated, 256) +
870 PredictionCostSpatial(counts, 3, kExpValue);
871 }
872
GetBestColorTransformForTile(int tile_x,int tile_y,int bits,Multipliers prevX,Multipliers prevY,int step,int xsize,int ysize,int * accumulated_red_histo,int * accumulated_blue_histo,const uint32_t * const argb)873 static Multipliers GetBestColorTransformForTile(
874 int tile_x, int tile_y, int bits,
875 Multipliers prevX,
876 Multipliers prevY,
877 int step, int xsize, int ysize,
878 int* accumulated_red_histo,
879 int* accumulated_blue_histo,
880 const uint32_t* const argb) {
881 float best_diff = MAX_DIFF_COST;
882 float cur_diff;
883 const int halfstep = step / 2;
884 const int max_tile_size = 1 << bits;
885 const int tile_y_offset = tile_y * max_tile_size;
886 const int tile_x_offset = tile_x * max_tile_size;
887 int green_to_red;
888 int green_to_blue;
889 int red_to_blue;
890 int all_x_max = tile_x_offset + max_tile_size;
891 int all_y_max = tile_y_offset + max_tile_size;
892 Multipliers best_tx;
893 MultipliersClear(&best_tx);
894 if (all_x_max > xsize) {
895 all_x_max = xsize;
896 }
897 if (all_y_max > ysize) {
898 all_y_max = ysize;
899 }
900
901 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
902 int histo[256] = { 0 };
903 int all_y;
904
905 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
906 int ix = all_y * xsize + tile_x_offset;
907 int all_x;
908 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
909 if (SkipRepeatedPixels(argb, ix, xsize)) {
910 continue;
911 }
912 ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
913 }
914 }
915 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
916 if ((uint8_t)green_to_red == prevX.green_to_red_) {
917 cur_diff -= 3; // favor keeping the areas locally similar
918 }
919 if ((uint8_t)green_to_red == prevY.green_to_red_) {
920 cur_diff -= 3; // favor keeping the areas locally similar
921 }
922 if (green_to_red == 0) {
923 cur_diff -= 3;
924 }
925 if (cur_diff < best_diff) {
926 best_diff = cur_diff;
927 best_tx.green_to_red_ = green_to_red;
928 }
929 }
930 best_diff = MAX_DIFF_COST;
931 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
932 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
933 int all_y;
934 int histo[256] = { 0 };
935 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
936 int all_x;
937 int ix = all_y * xsize + tile_x_offset;
938 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
939 if (SkipRepeatedPixels(argb, ix, xsize)) {
940 continue;
941 }
942 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
943 }
944 }
945 cur_diff =
946 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
947 if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
948 cur_diff -= 3; // favor keeping the areas locally similar
949 }
950 if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
951 cur_diff -= 3; // favor keeping the areas locally similar
952 }
953 if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
954 cur_diff -= 3; // favor keeping the areas locally similar
955 }
956 if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
957 cur_diff -= 3; // favor keeping the areas locally similar
958 }
959 if (green_to_blue == 0) {
960 cur_diff -= 3;
961 }
962 if (red_to_blue == 0) {
963 cur_diff -= 3;
964 }
965 if (cur_diff < best_diff) {
966 best_diff = cur_diff;
967 best_tx.green_to_blue_ = green_to_blue;
968 best_tx.red_to_blue_ = red_to_blue;
969 }
970 }
971 }
972 return best_tx;
973 }
974
CopyTileWithColorTransform(int xsize,int ysize,int tile_x,int tile_y,int bits,Multipliers color_transform,uint32_t * const argb)975 static void CopyTileWithColorTransform(int xsize, int ysize,
976 int tile_x, int tile_y, int bits,
977 Multipliers color_transform,
978 uint32_t* const argb) {
979 int y;
980 int xscan = 1 << bits;
981 int yscan = 1 << bits;
982 tile_x <<= bits;
983 tile_y <<= bits;
984 if (xscan > xsize - tile_x) {
985 xscan = xsize - tile_x;
986 }
987 if (yscan > ysize - tile_y) {
988 yscan = ysize - tile_y;
989 }
990 yscan += tile_y;
991 for (y = tile_y; y < yscan; ++y) {
992 int ix = y * xsize + tile_x;
993 const int end_ix = ix + xscan;
994 for (; ix < end_ix; ++ix) {
995 argb[ix] = TransformColor(&color_transform, argb[ix], 0);
996 }
997 }
998 }
999
VP8LColorSpaceTransform(int width,int height,int bits,int step,uint32_t * const argb,uint32_t * image)1000 void VP8LColorSpaceTransform(int width, int height, int bits, int step,
1001 uint32_t* const argb, uint32_t* image) {
1002 const int max_tile_size = 1 << bits;
1003 int tile_xsize = VP8LSubSampleSize(width, bits);
1004 int tile_ysize = VP8LSubSampleSize(height, bits);
1005 int accumulated_red_histo[256] = { 0 };
1006 int accumulated_blue_histo[256] = { 0 };
1007 int tile_y;
1008 int tile_x;
1009 Multipliers prevX;
1010 Multipliers prevY;
1011 MultipliersClear(&prevY);
1012 MultipliersClear(&prevX);
1013 for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1014 for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1015 Multipliers color_transform;
1016 int all_x_max;
1017 int y;
1018 const int tile_y_offset = tile_y * max_tile_size;
1019 const int tile_x_offset = tile_x * max_tile_size;
1020 if (tile_y != 0) {
1021 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
1022 ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
1023 &prevY);
1024 } else if (tile_x != 0) {
1025 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
1026 }
1027 color_transform =
1028 GetBestColorTransformForTile(tile_x, tile_y, bits,
1029 prevX, prevY,
1030 step, width, height,
1031 &accumulated_red_histo[0],
1032 &accumulated_blue_histo[0],
1033 argb);
1034 image[tile_y * tile_xsize + tile_x] =
1035 MultipliersToColorCode(&color_transform);
1036 CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
1037 color_transform, argb);
1038
1039 // Gather accumulated histogram data.
1040 all_x_max = tile_x_offset + max_tile_size;
1041 if (all_x_max > width) {
1042 all_x_max = width;
1043 }
1044 for (y = 0; y < max_tile_size; ++y) {
1045 int ix;
1046 int all_x;
1047 int all_y = tile_y_offset + y;
1048 if (all_y >= height) {
1049 break;
1050 }
1051 ix = all_y * width + tile_x_offset;
1052 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
1053 if (ix >= 2 &&
1054 argb[ix] == argb[ix - 2] &&
1055 argb[ix] == argb[ix - 1]) {
1056 continue; // repeated pixels are handled by backward references
1057 }
1058 if (ix >= width + 2 &&
1059 argb[ix - 2] == argb[ix - width - 2] &&
1060 argb[ix - 1] == argb[ix - width - 1] &&
1061 argb[ix] == argb[ix - width]) {
1062 continue; // repeated pixels are handled by backward references
1063 }
1064 ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
1065 ++accumulated_blue_histo[argb[ix] & 0xff];
1066 }
1067 }
1068 }
1069 }
1070 }
1071
1072 // Color space inverse transform.
ColorSpaceInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)1073 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
1074 int y_start, int y_end, uint32_t* data) {
1075 const int width = transform->xsize_;
1076 const int mask = (1 << transform->bits_) - 1;
1077 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
1078 int y = y_start;
1079 const uint32_t* pred_row =
1080 transform->data_ + (y >> transform->bits_) * tiles_per_row;
1081
1082 while (y < y_end) {
1083 const uint32_t* pred = pred_row;
1084 Multipliers m = { 0, 0, 0 };
1085 int x;
1086
1087 for (x = 0; x < width; ++x) {
1088 if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
1089 data[x] = TransformColor(&m, data[x], 1);
1090 }
1091 data += width;
1092 ++y;
1093 if ((y & mask) == 0) pred_row += tiles_per_row;;
1094 }
1095 }
1096
1097 // Separate out pixels packed together using pixel-bundling.
1098 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
1099 #define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
1100 void FUNC_NAME(const VP8LTransform* const transform, \
1101 int y_start, int y_end, const TYPE* src, TYPE* dst) { \
1102 int y; \
1103 const int bits_per_pixel = 8 >> transform->bits_; \
1104 const int width = transform->xsize_; \
1105 const uint32_t* const color_map = transform->data_; \
1106 if (bits_per_pixel < 8) { \
1107 const int pixels_per_byte = 1 << transform->bits_; \
1108 const int count_mask = pixels_per_byte - 1; \
1109 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
1110 for (y = y_start; y < y_end; ++y) { \
1111 uint32_t packed_pixels = 0; \
1112 int x; \
1113 for (x = 0; x < width; ++x) { \
1114 /* We need to load fresh 'packed_pixels' once every */ \
1115 /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
1116 /* is a power of 2, so can just use a mask for that, instead of */ \
1117 /* decrementing a counter. */ \
1118 if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
1119 *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
1120 packed_pixels >>= bits_per_pixel; \
1121 } \
1122 } \
1123 } else { \
1124 for (y = y_start; y < y_end; ++y) { \
1125 int x; \
1126 for (x = 0; x < width; ++x) { \
1127 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
1128 } \
1129 } \
1130 } \
1131 }
1132
GetARGBIndex(uint32_t idx)1133 static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
1134 return (idx >> 8) & 0xff;
1135 }
1136
GetAlphaIndex(uint8_t idx)1137 static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
1138 return idx;
1139 }
1140
GetARGBValue(uint32_t val)1141 static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
1142 return val;
1143 }
1144
GetAlphaValue(uint32_t val)1145 static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
1146 return (val >> 8) & 0xff;
1147 }
1148
COLOR_INDEX_INVERSE(ColorIndexInverseTransform,uint32_t,GetARGBIndex,GetARGBValue)1149 static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
1150 GetARGBValue)
1151 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
1152 GetAlphaValue)
1153
1154 #undef COLOR_INDEX_INVERSE
1155
1156 void VP8LInverseTransform(const VP8LTransform* const transform,
1157 int row_start, int row_end,
1158 const uint32_t* const in, uint32_t* const out) {
1159 assert(row_start < row_end);
1160 assert(row_end <= transform->ysize_);
1161 switch (transform->type_) {
1162 case SUBTRACT_GREEN:
1163 AddGreenToBlueAndRed(transform, row_start, row_end, out);
1164 break;
1165 case PREDICTOR_TRANSFORM:
1166 PredictorInverseTransform(transform, row_start, row_end, out);
1167 if (row_end != transform->ysize_) {
1168 // The last predicted row in this iteration will be the top-pred row
1169 // for the first row in next iteration.
1170 const int width = transform->xsize_;
1171 memcpy(out - width, out + (row_end - row_start - 1) * width,
1172 width * sizeof(*out));
1173 }
1174 break;
1175 case CROSS_COLOR_TRANSFORM:
1176 ColorSpaceInverseTransform(transform, row_start, row_end, out);
1177 break;
1178 case COLOR_INDEXING_TRANSFORM:
1179 if (in == out && transform->bits_ > 0) {
1180 // Move packed pixels to the end of unpacked region, so that unpacking
1181 // can occur seamlessly.
1182 // Also, note that this is the only transform that applies on
1183 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1184 // transforms work on effective width of xsize_.
1185 const int out_stride = (row_end - row_start) * transform->xsize_;
1186 const int in_stride = (row_end - row_start) *
1187 VP8LSubSampleSize(transform->xsize_, transform->bits_);
1188 uint32_t* const src = out + out_stride - in_stride;
1189 memmove(src, out, in_stride * sizeof(*src));
1190 ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1191 } else {
1192 ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1193 }
1194 break;
1195 }
1196 }
1197
1198 //------------------------------------------------------------------------------
1199 // Color space conversion.
1200
is_big_endian(void)1201 static int is_big_endian(void) {
1202 static const union {
1203 uint16_t w;
1204 uint8_t b[2];
1205 } tmp = { 1 };
1206 return (tmp.b[0] != 1);
1207 }
1208
ConvertBGRAToRGB(const uint32_t * src,int num_pixels,uint8_t * dst)1209 static void ConvertBGRAToRGB(const uint32_t* src,
1210 int num_pixels, uint8_t* dst) {
1211 const uint32_t* const src_end = src + num_pixels;
1212 while (src < src_end) {
1213 const uint32_t argb = *src++;
1214 *dst++ = (argb >> 16) & 0xff;
1215 *dst++ = (argb >> 8) & 0xff;
1216 *dst++ = (argb >> 0) & 0xff;
1217 }
1218 }
1219
ConvertBGRAToRGBA(const uint32_t * src,int num_pixels,uint8_t * dst)1220 static void ConvertBGRAToRGBA(const uint32_t* src,
1221 int num_pixels, uint8_t* dst) {
1222 const uint32_t* const src_end = src + num_pixels;
1223 while (src < src_end) {
1224 const uint32_t argb = *src++;
1225 *dst++ = (argb >> 16) & 0xff;
1226 *dst++ = (argb >> 8) & 0xff;
1227 *dst++ = (argb >> 0) & 0xff;
1228 *dst++ = (argb >> 24) & 0xff;
1229 }
1230 }
1231
ConvertBGRAToRGBA4444(const uint32_t * src,int num_pixels,uint8_t * dst)1232 static void ConvertBGRAToRGBA4444(const uint32_t* src,
1233 int num_pixels, uint8_t* dst) {
1234 const uint32_t* const src_end = src + num_pixels;
1235 while (src < src_end) {
1236 const uint32_t argb = *src++;
1237 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1238 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
1239 #ifdef WEBP_SWAP_16BIT_CSP
1240 *dst++ = ba;
1241 *dst++ = rg;
1242 #else
1243 *dst++ = rg;
1244 *dst++ = ba;
1245 #endif
1246 }
1247 }
1248
ConvertBGRAToRGB565(const uint32_t * src,int num_pixels,uint8_t * dst)1249 static void ConvertBGRAToRGB565(const uint32_t* src,
1250 int num_pixels, uint8_t* dst) {
1251 const uint32_t* const src_end = src + num_pixels;
1252 while (src < src_end) {
1253 const uint32_t argb = *src++;
1254 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1255 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
1256 #ifdef WEBP_SWAP_16BIT_CSP
1257 *dst++ = gb;
1258 *dst++ = rg;
1259 #else
1260 *dst++ = rg;
1261 *dst++ = gb;
1262 #endif
1263 }
1264 }
1265
ConvertBGRAToBGR(const uint32_t * src,int num_pixels,uint8_t * dst)1266 static void ConvertBGRAToBGR(const uint32_t* src,
1267 int num_pixels, uint8_t* dst) {
1268 const uint32_t* const src_end = src + num_pixels;
1269 while (src < src_end) {
1270 const uint32_t argb = *src++;
1271 *dst++ = (argb >> 0) & 0xff;
1272 *dst++ = (argb >> 8) & 0xff;
1273 *dst++ = (argb >> 16) & 0xff;
1274 }
1275 }
1276
CopyOrSwap(const uint32_t * src,int num_pixels,uint8_t * dst,int swap_on_big_endian)1277 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1278 int swap_on_big_endian) {
1279 if (is_big_endian() == swap_on_big_endian) {
1280 const uint32_t* const src_end = src + num_pixels;
1281 while (src < src_end) {
1282 uint32_t argb = *src++;
1283
1284 #if !defined(__BIG_ENDIAN__)
1285 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1286 #if defined(__i386__) || defined(__x86_64__)
1287 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
1288 *(uint32_t*)dst = argb;
1289 #elif defined(_MSC_VER)
1290 argb = _byteswap_ulong(argb);
1291 *(uint32_t*)dst = argb;
1292 #else
1293 dst[0] = (argb >> 24) & 0xff;
1294 dst[1] = (argb >> 16) & 0xff;
1295 dst[2] = (argb >> 8) & 0xff;
1296 dst[3] = (argb >> 0) & 0xff;
1297 #endif
1298 #else // WEBP_REFERENCE_IMPLEMENTATION
1299 dst[0] = (argb >> 24) & 0xff;
1300 dst[1] = (argb >> 16) & 0xff;
1301 dst[2] = (argb >> 8) & 0xff;
1302 dst[3] = (argb >> 0) & 0xff;
1303 #endif
1304 #else // __BIG_ENDIAN__
1305 dst[0] = (argb >> 0) & 0xff;
1306 dst[1] = (argb >> 8) & 0xff;
1307 dst[2] = (argb >> 16) & 0xff;
1308 dst[3] = (argb >> 24) & 0xff;
1309 #endif
1310 dst += sizeof(argb);
1311 }
1312 } else {
1313 memcpy(dst, src, num_pixels * sizeof(*src));
1314 }
1315 }
1316
VP8LConvertFromBGRA(const uint32_t * const in_data,int num_pixels,WEBP_CSP_MODE out_colorspace,uint8_t * const rgba)1317 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1318 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1319 switch (out_colorspace) {
1320 case MODE_RGB:
1321 ConvertBGRAToRGB(in_data, num_pixels, rgba);
1322 break;
1323 case MODE_RGBA:
1324 ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1325 break;
1326 case MODE_rgbA:
1327 ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1328 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1329 break;
1330 case MODE_BGR:
1331 ConvertBGRAToBGR(in_data, num_pixels, rgba);
1332 break;
1333 case MODE_BGRA:
1334 CopyOrSwap(in_data, num_pixels, rgba, 1);
1335 break;
1336 case MODE_bgrA:
1337 CopyOrSwap(in_data, num_pixels, rgba, 1);
1338 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1339 break;
1340 case MODE_ARGB:
1341 CopyOrSwap(in_data, num_pixels, rgba, 0);
1342 break;
1343 case MODE_Argb:
1344 CopyOrSwap(in_data, num_pixels, rgba, 0);
1345 WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1346 break;
1347 case MODE_RGBA_4444:
1348 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1349 break;
1350 case MODE_rgbA_4444:
1351 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1352 WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1353 break;
1354 case MODE_RGB_565:
1355 ConvertBGRAToRGB565(in_data, num_pixels, rgba);
1356 break;
1357 default:
1358 assert(0); // Code flow should not reach here.
1359 }
1360 }
1361
1362 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
VP8LBundleColorMap(const uint8_t * const row,int width,int xbits,uint32_t * const dst)1363 void VP8LBundleColorMap(const uint8_t* const row, int width,
1364 int xbits, uint32_t* const dst) {
1365 int x;
1366 if (xbits > 0) {
1367 const int bit_depth = 1 << (3 - xbits);
1368 const int mask = (1 << xbits) - 1;
1369 uint32_t code = 0xff000000;
1370 for (x = 0; x < width; ++x) {
1371 const int xsub = x & mask;
1372 if (xsub == 0) {
1373 code = 0xff000000;
1374 }
1375 code |= row[x] << (8 + bit_depth * xsub);
1376 dst[x >> xbits] = code;
1377 }
1378 } else {
1379 for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
1380 }
1381 }
1382
1383 //------------------------------------------------------------------------------
1384
1385 #if defined(__cplusplus) || defined(c_plusplus)
1386 } // extern "C"
1387 #endif
1388