1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transforms and color space conversion methods for lossless decoder.
11 //
12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 // Jyrki Alakuijala (jyrki@google.com)
14 // Urvang Joshi (urvang@google.com)
15
16 #include "./dsp.h"
17
18 #include <math.h>
19 #include <stdlib.h>
20 #include "../dec/vp8li.h"
21 #include "../utils/endian_inl.h"
22 #include "./lossless.h"
23 #include "./yuv.h"
24
25 #define MAX_DIFF_COST (1e30f)
26
27 // lookup table for small values of log2(int)
28 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
29 0.0000000000000000f, 0.0000000000000000f,
30 1.0000000000000000f, 1.5849625007211560f,
31 2.0000000000000000f, 2.3219280948873621f,
32 2.5849625007211560f, 2.8073549220576041f,
33 3.0000000000000000f, 3.1699250014423121f,
34 3.3219280948873621f, 3.4594316186372973f,
35 3.5849625007211560f, 3.7004397181410921f,
36 3.8073549220576041f, 3.9068905956085187f,
37 4.0000000000000000f, 4.0874628412503390f,
38 4.1699250014423121f, 4.2479275134435852f,
39 4.3219280948873626f, 4.3923174227787606f,
40 4.4594316186372973f, 4.5235619560570130f,
41 4.5849625007211560f, 4.6438561897747243f,
42 4.7004397181410917f, 4.7548875021634682f,
43 4.8073549220576037f, 4.8579809951275718f,
44 4.9068905956085187f, 4.9541963103868749f,
45 5.0000000000000000f, 5.0443941193584533f,
46 5.0874628412503390f, 5.1292830169449663f,
47 5.1699250014423121f, 5.2094533656289501f,
48 5.2479275134435852f, 5.2854022188622487f,
49 5.3219280948873626f, 5.3575520046180837f,
50 5.3923174227787606f, 5.4262647547020979f,
51 5.4594316186372973f, 5.4918530963296747f,
52 5.5235619560570130f, 5.5545888516776376f,
53 5.5849625007211560f, 5.6147098441152083f,
54 5.6438561897747243f, 5.6724253419714951f,
55 5.7004397181410917f, 5.7279204545631987f,
56 5.7548875021634682f, 5.7813597135246599f,
57 5.8073549220576037f, 5.8328900141647412f,
58 5.8579809951275718f, 5.8826430493618415f,
59 5.9068905956085187f, 5.9307373375628866f,
60 5.9541963103868749f, 5.9772799234999167f,
61 6.0000000000000000f, 6.0223678130284543f,
62 6.0443941193584533f, 6.0660891904577720f,
63 6.0874628412503390f, 6.1085244567781691f,
64 6.1292830169449663f, 6.1497471195046822f,
65 6.1699250014423121f, 6.1898245588800175f,
66 6.2094533656289501f, 6.2288186904958804f,
67 6.2479275134435852f, 6.2667865406949010f,
68 6.2854022188622487f, 6.3037807481771030f,
69 6.3219280948873626f, 6.3398500028846243f,
70 6.3575520046180837f, 6.3750394313469245f,
71 6.3923174227787606f, 6.4093909361377017f,
72 6.4262647547020979f, 6.4429434958487279f,
73 6.4594316186372973f, 6.4757334309663976f,
74 6.4918530963296747f, 6.5077946401986963f,
75 6.5235619560570130f, 6.5391588111080309f,
76 6.5545888516776376f, 6.5698556083309478f,
77 6.5849625007211560f, 6.5999128421871278f,
78 6.6147098441152083f, 6.6293566200796094f,
79 6.6438561897747243f, 6.6582114827517946f,
80 6.6724253419714951f, 6.6865005271832185f,
81 6.7004397181410917f, 6.7142455176661224f,
82 6.7279204545631987f, 6.7414669864011464f,
83 6.7548875021634682f, 6.7681843247769259f,
84 6.7813597135246599f, 6.7944158663501061f,
85 6.8073549220576037f, 6.8201789624151878f,
86 6.8328900141647412f, 6.8454900509443747f,
87 6.8579809951275718f, 6.8703647195834047f,
88 6.8826430493618415f, 6.8948177633079437f,
89 6.9068905956085187f, 6.9188632372745946f,
90 6.9307373375628866f, 6.9425145053392398f,
91 6.9541963103868749f, 6.9657842846620869f,
92 6.9772799234999167f, 6.9886846867721654f,
93 7.0000000000000000f, 7.0112272554232539f,
94 7.0223678130284543f, 7.0334230015374501f,
95 7.0443941193584533f, 7.0552824355011898f,
96 7.0660891904577720f, 7.0768155970508308f,
97 7.0874628412503390f, 7.0980320829605263f,
98 7.1085244567781691f, 7.1189410727235076f,
99 7.1292830169449663f, 7.1395513523987936f,
100 7.1497471195046822f, 7.1598713367783890f,
101 7.1699250014423121f, 7.1799090900149344f,
102 7.1898245588800175f, 7.1996723448363644f,
103 7.2094533656289501f, 7.2191685204621611f,
104 7.2288186904958804f, 7.2384047393250785f,
105 7.2479275134435852f, 7.2573878426926521f,
106 7.2667865406949010f, 7.2761244052742375f,
107 7.2854022188622487f, 7.2946207488916270f,
108 7.3037807481771030f, 7.3128829552843557f,
109 7.3219280948873626f, 7.3309168781146167f,
110 7.3398500028846243f, 7.3487281542310771f,
111 7.3575520046180837f, 7.3663222142458160f,
112 7.3750394313469245f, 7.3837042924740519f,
113 7.3923174227787606f, 7.4008794362821843f,
114 7.4093909361377017f, 7.4178525148858982f,
115 7.4262647547020979f, 7.4346282276367245f,
116 7.4429434958487279f, 7.4512111118323289f,
117 7.4594316186372973f, 7.4676055500829976f,
118 7.4757334309663976f, 7.4838157772642563f,
119 7.4918530963296747f, 7.4998458870832056f,
120 7.5077946401986963f, 7.5156998382840427f,
121 7.5235619560570130f, 7.5313814605163118f,
122 7.5391588111080309f, 7.5468944598876364f,
123 7.5545888516776376f, 7.5622424242210728f,
124 7.5698556083309478f, 7.5774288280357486f,
125 7.5849625007211560f, 7.5924570372680806f,
126 7.5999128421871278f, 7.6073303137496104f,
127 7.6147098441152083f, 7.6220518194563764f,
128 7.6293566200796094f, 7.6366246205436487f,
129 7.6438561897747243f, 7.6510516911789281f,
130 7.6582114827517946f, 7.6653359171851764f,
131 7.6724253419714951f, 7.6794800995054464f,
132 7.6865005271832185f, 7.6934869574993252f,
133 7.7004397181410917f, 7.7073591320808825f,
134 7.7142455176661224f, 7.7210991887071855f,
135 7.7279204545631987f, 7.7347096202258383f,
136 7.7414669864011464f, 7.7481928495894605f,
137 7.7548875021634682f, 7.7615512324444795f,
138 7.7681843247769259f, 7.7747870596011736f,
139 7.7813597135246599f, 7.7879025593914317f,
140 7.7944158663501061f, 7.8008998999203047f,
141 7.8073549220576037f, 7.8137811912170374f,
142 7.8201789624151878f, 7.8265484872909150f,
143 7.8328900141647412f, 7.8392037880969436f,
144 7.8454900509443747f, 7.8517490414160571f,
145 7.8579809951275718f, 7.8641861446542797f,
146 7.8703647195834047f, 7.8765169465649993f,
147 7.8826430493618415f, 7.8887432488982591f,
148 7.8948177633079437f, 7.9008668079807486f,
149 7.9068905956085187f, 7.9128893362299619f,
150 7.9188632372745946f, 7.9248125036057812f,
151 7.9307373375628866f, 7.9366379390025709f,
152 7.9425145053392398f, 7.9483672315846778f,
153 7.9541963103868749f, 7.9600019320680805f,
154 7.9657842846620869f, 7.9715435539507719f,
155 7.9772799234999167f, 7.9829935746943103f,
156 7.9886846867721654f, 7.9943534368588577f
157 };
158
159 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
160 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
161 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
162 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
163 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
164 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
165 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
166 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
167 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
168 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
169 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
170 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
171 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
172 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
173 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
174 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
175 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
176 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
177 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
178 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
179 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
180 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
181 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
182 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
183 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
184 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
185 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
186 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
187 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
188 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
189 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
190 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
191 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
192 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
193 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
194 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
195 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
196 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
197 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
198 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
199 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
200 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
201 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
202 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
203 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
204 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
205 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
206 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
207 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
208 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
209 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
210 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
211 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
212 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
213 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
214 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
215 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
216 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
217 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
218 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
219 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
220 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
221 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
222 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
223 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
224 };
225
226 const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
227 { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
228 { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
229 { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
230 { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
231 { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
232 {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
233 {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
234 {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
235 {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
236 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
237 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
238 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
239 {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
240 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
241 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
242 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
243 {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
244 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
245 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
246 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
247 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
248 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
249 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
250 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
251 {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
252 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
253 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
254 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
255 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
256 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
257 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
258 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
259 {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
260 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
261 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
262 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
263 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
264 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
265 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
266 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
267 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
268 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
269 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
270 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
271 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
272 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
273 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
274 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
275 {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
276 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
277 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
278 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
279 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
280 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
281 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
282 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
283 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
284 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
285 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
286 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
287 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
288 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
289 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
290 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
291 };
292
293 const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
294 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3,
295 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
296 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
297 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
298 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
299 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
300 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
301 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
302 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
303 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
304 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
305 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
306 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
307 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
308 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
309 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
310 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
311 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
312 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
313 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
314 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
315 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
316 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
317 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
318 127,
319 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
320 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
321 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
322 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
323 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
324 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
325 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
326 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
327 };
328
329 // The threshold till approximate version of log_2 can be used.
330 // Practically, we can get rid of the call to log() as the two values match to
331 // very high degree (the ratio of these two is 0.99999x).
332 // Keeping a high threshold for now.
333 #define APPROX_LOG_WITH_CORRECTION_MAX 65536
334 #define APPROX_LOG_MAX 4096
335 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
FastSLog2Slow(uint32_t v)336 static float FastSLog2Slow(uint32_t v) {
337 assert(v >= LOG_LOOKUP_IDX_MAX);
338 if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
339 int log_cnt = 0;
340 uint32_t y = 1;
341 int correction = 0;
342 const float v_f = (float)v;
343 const uint32_t orig_v = v;
344 do {
345 ++log_cnt;
346 v = v >> 1;
347 y = y << 1;
348 } while (v >= LOG_LOOKUP_IDX_MAX);
349 // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
350 // Xf = floor(Xf) * (1 + (v % y) / v)
351 // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
352 // The correction factor: log(1 + d) ~ d; for very small d values, so
353 // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
354 // LOG_2_RECIPROCAL ~ 23/16
355 correction = (23 * (orig_v & (y - 1))) >> 4;
356 return v_f * (kLog2Table[v] + log_cnt) + correction;
357 } else {
358 return (float)(LOG_2_RECIPROCAL * v * log((double)v));
359 }
360 }
361
FastLog2Slow(uint32_t v)362 static float FastLog2Slow(uint32_t v) {
363 assert(v >= LOG_LOOKUP_IDX_MAX);
364 if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
365 int log_cnt = 0;
366 uint32_t y = 1;
367 const uint32_t orig_v = v;
368 double log_2;
369 do {
370 ++log_cnt;
371 v = v >> 1;
372 y = y << 1;
373 } while (v >= LOG_LOOKUP_IDX_MAX);
374 log_2 = kLog2Table[v] + log_cnt;
375 if (orig_v >= APPROX_LOG_MAX) {
376 // Since the division is still expensive, add this correction factor only
377 // for large values of 'v'.
378 const int correction = (23 * (orig_v & (y - 1))) >> 4;
379 log_2 += (double)correction / orig_v;
380 }
381 return (float)log_2;
382 } else {
383 return (float)(LOG_2_RECIPROCAL * log((double)v));
384 }
385 }
386
387 //------------------------------------------------------------------------------
388 // Image transforms.
389
390 // Mostly used to reduce code size + readability
GetMin(int a,int b)391 static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
392
393 // In-place sum of each component with mod 256.
AddPixelsEq(uint32_t * a,uint32_t b)394 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
395 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
396 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
397 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
398 }
399
Average2(uint32_t a0,uint32_t a1)400 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
401 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
402 }
403
Average3(uint32_t a0,uint32_t a1,uint32_t a2)404 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
405 return Average2(Average2(a0, a2), a1);
406 }
407
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)408 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
409 uint32_t a2, uint32_t a3) {
410 return Average2(Average2(a0, a1), Average2(a2, a3));
411 }
412
Clip255(uint32_t a)413 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
414 if (a < 256) {
415 return a;
416 }
417 // return 0, when a is a negative integer.
418 // return 255, when a is positive.
419 return ~a >> 24;
420 }
421
AddSubtractComponentFull(int a,int b,int c)422 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
423 return Clip255(a + b - c);
424 }
425
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)426 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
427 uint32_t c2) {
428 const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
429 const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
430 (c1 >> 16) & 0xff,
431 (c2 >> 16) & 0xff);
432 const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
433 (c1 >> 8) & 0xff,
434 (c2 >> 8) & 0xff);
435 const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
436 return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
437 }
438
AddSubtractComponentHalf(int a,int b)439 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
440 return Clip255(a + (a - b) / 2);
441 }
442
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)443 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
444 uint32_t c2) {
445 const uint32_t ave = Average2(c0, c1);
446 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
447 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
448 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
449 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
450 return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
451 }
452
453 // gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
454 #if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
455 # define LOCAL_INLINE __attribute__ ((noinline))
456 #else
457 # define LOCAL_INLINE WEBP_INLINE
458 #endif
459
Sub3(int a,int b,int c)460 static LOCAL_INLINE int Sub3(int a, int b, int c) {
461 const int pb = b - c;
462 const int pa = a - c;
463 return abs(pb) - abs(pa);
464 }
465
466 #undef LOCAL_INLINE
467
Select(uint32_t a,uint32_t b,uint32_t c)468 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
469 const int pa_minus_pb =
470 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
471 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
472 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
473 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
474 return (pa_minus_pb <= 0) ? a : b;
475 }
476
477 //------------------------------------------------------------------------------
478 // Predictors
479
Predictor0(uint32_t left,const uint32_t * const top)480 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
481 (void)top;
482 (void)left;
483 return ARGB_BLACK;
484 }
Predictor1(uint32_t left,const uint32_t * const top)485 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
486 (void)top;
487 return left;
488 }
Predictor2(uint32_t left,const uint32_t * const top)489 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
490 (void)left;
491 return top[0];
492 }
Predictor3(uint32_t left,const uint32_t * const top)493 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
494 (void)left;
495 return top[1];
496 }
Predictor4(uint32_t left,const uint32_t * const top)497 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
498 (void)left;
499 return top[-1];
500 }
Predictor5(uint32_t left,const uint32_t * const top)501 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
502 const uint32_t pred = Average3(left, top[0], top[1]);
503 return pred;
504 }
Predictor6(uint32_t left,const uint32_t * const top)505 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
506 const uint32_t pred = Average2(left, top[-1]);
507 return pred;
508 }
Predictor7(uint32_t left,const uint32_t * const top)509 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
510 const uint32_t pred = Average2(left, top[0]);
511 return pred;
512 }
Predictor8(uint32_t left,const uint32_t * const top)513 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
514 const uint32_t pred = Average2(top[-1], top[0]);
515 (void)left;
516 return pred;
517 }
Predictor9(uint32_t left,const uint32_t * const top)518 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
519 const uint32_t pred = Average2(top[0], top[1]);
520 (void)left;
521 return pred;
522 }
Predictor10(uint32_t left,const uint32_t * const top)523 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
524 const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
525 return pred;
526 }
Predictor11(uint32_t left,const uint32_t * const top)527 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
528 const uint32_t pred = Select(top[0], left, top[-1]);
529 return pred;
530 }
Predictor12(uint32_t left,const uint32_t * const top)531 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
532 const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
533 return pred;
534 }
Predictor13(uint32_t left,const uint32_t * const top)535 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
536 const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
537 return pred;
538 }
539
540 static const VP8LPredictorFunc kPredictorsC[16] = {
541 Predictor0, Predictor1, Predictor2, Predictor3,
542 Predictor4, Predictor5, Predictor6, Predictor7,
543 Predictor8, Predictor9, Predictor10, Predictor11,
544 Predictor12, Predictor13,
545 Predictor0, Predictor0 // <- padding security sentinels
546 };
547
PredictionCostSpatial(const int counts[256],int weight_0,double exp_val)548 static float PredictionCostSpatial(const int counts[256], int weight_0,
549 double exp_val) {
550 const int significant_symbols = 256 >> 4;
551 const double exp_decay_factor = 0.6;
552 double bits = weight_0 * counts[0];
553 int i;
554 for (i = 1; i < significant_symbols; ++i) {
555 bits += exp_val * (counts[i] + counts[256 - i]);
556 exp_val *= exp_decay_factor;
557 }
558 return (float)(-0.1 * bits);
559 }
560
561 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
CombinedShannonEntropy(const int X[256],const int Y[256])562 static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
563 int i;
564 double retval = 0.;
565 int sumX = 0, sumXY = 0;
566 for (i = 0; i < 256; ++i) {
567 const int x = X[i];
568 const int xy = x + Y[i];
569 if (x != 0) {
570 sumX += x;
571 retval -= VP8LFastSLog2(x);
572 sumXY += xy;
573 retval -= VP8LFastSLog2(xy);
574 } else if (xy != 0) {
575 sumXY += xy;
576 retval -= VP8LFastSLog2(xy);
577 }
578 }
579 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
580 return (float)retval;
581 }
582
PredictionCostSpatialHistogram(const int accumulated[4][256],const int tile[4][256])583 static float PredictionCostSpatialHistogram(const int accumulated[4][256],
584 const int tile[4][256]) {
585 int i;
586 double retval = 0;
587 for (i = 0; i < 4; ++i) {
588 const double kExpValue = 0.94;
589 retval += PredictionCostSpatial(tile[i], 1, kExpValue);
590 retval += CombinedShannonEntropy(tile[i], accumulated[i]);
591 }
592 return (float)retval;
593 }
594
UpdateHisto(int histo_argb[4][256],uint32_t argb)595 static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
596 ++histo_argb[0][argb >> 24];
597 ++histo_argb[1][(argb >> 16) & 0xff];
598 ++histo_argb[2][(argb >> 8) & 0xff];
599 ++histo_argb[3][argb & 0xff];
600 }
601
GetBestPredictorForTile(int width,int height,int tile_x,int tile_y,int bits,const int accumulated[4][256],const uint32_t * const argb_scratch)602 static int GetBestPredictorForTile(int width, int height,
603 int tile_x, int tile_y, int bits,
604 const int accumulated[4][256],
605 const uint32_t* const argb_scratch) {
606 const int kNumPredModes = 14;
607 const int col_start = tile_x << bits;
608 const int row_start = tile_y << bits;
609 const int tile_size = 1 << bits;
610 const int max_y = GetMin(tile_size, height - row_start);
611 const int max_x = GetMin(tile_size, width - col_start);
612 float best_diff = MAX_DIFF_COST;
613 int best_mode = 0;
614 int mode;
615 for (mode = 0; mode < kNumPredModes; ++mode) {
616 const uint32_t* current_row = argb_scratch;
617 const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
618 float cur_diff;
619 int y;
620 int histo_argb[4][256];
621 memset(histo_argb, 0, sizeof(histo_argb));
622 for (y = 0; y < max_y; ++y) {
623 int x;
624 const int row = row_start + y;
625 const uint32_t* const upper_row = current_row;
626 current_row = upper_row + width;
627 for (x = 0; x < max_x; ++x) {
628 const int col = col_start + x;
629 uint32_t predict;
630 if (row == 0) {
631 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
632 } else if (col == 0) {
633 predict = upper_row[col]; // Top.
634 } else {
635 predict = pred_func(current_row[col - 1], upper_row + col);
636 }
637 UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
638 }
639 }
640 cur_diff = PredictionCostSpatialHistogram(
641 accumulated, (const int (*)[256])histo_argb);
642 if (cur_diff < best_diff) {
643 best_diff = cur_diff;
644 best_mode = mode;
645 }
646 }
647
648 return best_mode;
649 }
650
CopyTileWithPrediction(int width,int height,int tile_x,int tile_y,int bits,int mode,const uint32_t * const argb_scratch,uint32_t * const argb)651 static void CopyTileWithPrediction(int width, int height,
652 int tile_x, int tile_y, int bits, int mode,
653 const uint32_t* const argb_scratch,
654 uint32_t* const argb) {
655 const int col_start = tile_x << bits;
656 const int row_start = tile_y << bits;
657 const int tile_size = 1 << bits;
658 const int max_y = GetMin(tile_size, height - row_start);
659 const int max_x = GetMin(tile_size, width - col_start);
660 const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
661 const uint32_t* current_row = argb_scratch;
662
663 int y;
664 for (y = 0; y < max_y; ++y) {
665 int x;
666 const int row = row_start + y;
667 const uint32_t* const upper_row = current_row;
668 current_row = upper_row + width;
669 for (x = 0; x < max_x; ++x) {
670 const int col = col_start + x;
671 const int pix = row * width + col;
672 uint32_t predict;
673 if (row == 0) {
674 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
675 } else if (col == 0) {
676 predict = upper_row[col]; // Top.
677 } else {
678 predict = pred_func(current_row[col - 1], upper_row + col);
679 }
680 argb[pix] = VP8LSubPixels(current_row[col], predict);
681 }
682 }
683 }
684
VP8LResidualImage(int width,int height,int bits,uint32_t * const argb,uint32_t * const argb_scratch,uint32_t * const image)685 void VP8LResidualImage(int width, int height, int bits,
686 uint32_t* const argb, uint32_t* const argb_scratch,
687 uint32_t* const image) {
688 const int max_tile_size = 1 << bits;
689 const int tiles_per_row = VP8LSubSampleSize(width, bits);
690 const int tiles_per_col = VP8LSubSampleSize(height, bits);
691 uint32_t* const upper_row = argb_scratch;
692 uint32_t* const current_tile_rows = argb_scratch + width;
693 int tile_y;
694 int histo[4][256];
695 memset(histo, 0, sizeof(histo));
696 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
697 const int tile_y_offset = tile_y * max_tile_size;
698 const int this_tile_height =
699 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
700 int tile_x;
701 if (tile_y > 0) {
702 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
703 width * sizeof(*upper_row));
704 }
705 memcpy(current_tile_rows, &argb[tile_y_offset * width],
706 this_tile_height * width * sizeof(*current_tile_rows));
707 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
708 int pred;
709 int y;
710 const int tile_x_offset = tile_x * max_tile_size;
711 int all_x_max = tile_x_offset + max_tile_size;
712 if (all_x_max > width) {
713 all_x_max = width;
714 }
715 pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits,
716 (const int (*)[256])histo,
717 argb_scratch);
718 image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
719 CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
720 argb_scratch, argb);
721 for (y = 0; y < max_tile_size; ++y) {
722 int ix;
723 int all_x;
724 int all_y = tile_y_offset + y;
725 if (all_y >= height) {
726 break;
727 }
728 ix = all_y * width + tile_x_offset;
729 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
730 UpdateHisto(histo, argb[ix]);
731 }
732 }
733 }
734 }
735 }
736
737 // Inverse prediction.
PredictorInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)738 static void PredictorInverseTransform(const VP8LTransform* const transform,
739 int y_start, int y_end, uint32_t* data) {
740 const int width = transform->xsize_;
741 if (y_start == 0) { // First Row follows the L (mode=1) mode.
742 int x;
743 const uint32_t pred0 = Predictor0(data[-1], NULL);
744 AddPixelsEq(data, pred0);
745 for (x = 1; x < width; ++x) {
746 const uint32_t pred1 = Predictor1(data[x - 1], NULL);
747 AddPixelsEq(data + x, pred1);
748 }
749 data += width;
750 ++y_start;
751 }
752
753 {
754 int y = y_start;
755 const int tile_width = 1 << transform->bits_;
756 const int mask = tile_width - 1;
757 const int safe_width = width & ~mask;
758 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
759 const uint32_t* pred_mode_base =
760 transform->data_ + (y >> transform->bits_) * tiles_per_row;
761
762 while (y < y_end) {
763 const uint32_t pred2 = Predictor2(data[-1], data - width);
764 const uint32_t* pred_mode_src = pred_mode_base;
765 VP8LPredictorFunc pred_func;
766 int x = 1;
767 int t = 1;
768 // First pixel follows the T (mode=2) mode.
769 AddPixelsEq(data, pred2);
770 // .. the rest:
771 while (x < safe_width) {
772 pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
773 for (; t < tile_width; ++t, ++x) {
774 const uint32_t pred = pred_func(data[x - 1], data + x - width);
775 AddPixelsEq(data + x, pred);
776 }
777 t = 0;
778 }
779 if (x < width) {
780 pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
781 for (; x < width; ++x) {
782 const uint32_t pred = pred_func(data[x - 1], data + x - width);
783 AddPixelsEq(data + x, pred);
784 }
785 }
786 data += width;
787 ++y;
788 if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
789 pred_mode_base += tiles_per_row;
790 }
791 }
792 }
793 }
794
VP8LSubtractGreenFromBlueAndRed_C(uint32_t * argb_data,int num_pixels)795 void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
796 int i;
797 for (i = 0; i < num_pixels; ++i) {
798 const uint32_t argb = argb_data[i];
799 const uint32_t green = (argb >> 8) & 0xff;
800 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
801 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
802 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
803 }
804 }
805
806 // Add green to blue and red channels (i.e. perform the inverse transform of
807 // 'subtract green').
VP8LAddGreenToBlueAndRed_C(uint32_t * data,int num_pixels)808 void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
809 int i;
810 for (i = 0; i < num_pixels; ++i) {
811 const uint32_t argb = data[i];
812 const uint32_t green = ((argb >> 8) & 0xff);
813 uint32_t red_blue = (argb & 0x00ff00ffu);
814 red_blue += (green << 16) | green;
815 red_blue &= 0x00ff00ffu;
816 data[i] = (argb & 0xff00ff00u) | red_blue;
817 }
818 }
819
MultipliersClear(VP8LMultipliers * const m)820 static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
821 m->green_to_red_ = 0;
822 m->green_to_blue_ = 0;
823 m->red_to_blue_ = 0;
824 }
825
ColorTransformDelta(int8_t color_pred,int8_t color)826 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
827 int8_t color) {
828 return (uint32_t)((int)(color_pred) * color) >> 5;
829 }
830
ColorCodeToMultipliers(uint32_t color_code,VP8LMultipliers * const m)831 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
832 VP8LMultipliers* const m) {
833 m->green_to_red_ = (color_code >> 0) & 0xff;
834 m->green_to_blue_ = (color_code >> 8) & 0xff;
835 m->red_to_blue_ = (color_code >> 16) & 0xff;
836 }
837
MultipliersToColorCode(const VP8LMultipliers * const m)838 static WEBP_INLINE uint32_t MultipliersToColorCode(
839 const VP8LMultipliers* const m) {
840 return 0xff000000u |
841 ((uint32_t)(m->red_to_blue_) << 16) |
842 ((uint32_t)(m->green_to_blue_) << 8) |
843 m->green_to_red_;
844 }
845
VP8LTransformColor_C(const VP8LMultipliers * const m,uint32_t * data,int num_pixels)846 void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
847 int num_pixels) {
848 int i;
849 for (i = 0; i < num_pixels; ++i) {
850 const uint32_t argb = data[i];
851 const uint32_t green = argb >> 8;
852 const uint32_t red = argb >> 16;
853 uint32_t new_red = red;
854 uint32_t new_blue = argb;
855 new_red -= ColorTransformDelta(m->green_to_red_, green);
856 new_red &= 0xff;
857 new_blue -= ColorTransformDelta(m->green_to_blue_, green);
858 new_blue -= ColorTransformDelta(m->red_to_blue_, red);
859 new_blue &= 0xff;
860 data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
861 }
862 }
863
VP8LTransformColorInverse_C(const VP8LMultipliers * const m,uint32_t * data,int num_pixels)864 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
865 int num_pixels) {
866 int i;
867 for (i = 0; i < num_pixels; ++i) {
868 const uint32_t argb = data[i];
869 const uint32_t green = argb >> 8;
870 const uint32_t red = argb >> 16;
871 uint32_t new_red = red;
872 uint32_t new_blue = argb;
873 new_red += ColorTransformDelta(m->green_to_red_, green);
874 new_red &= 0xff;
875 new_blue += ColorTransformDelta(m->green_to_blue_, green);
876 new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
877 new_blue &= 0xff;
878 data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
879 }
880 }
881
TransformColorRed(uint8_t green_to_red,uint32_t argb)882 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
883 uint32_t argb) {
884 const uint32_t green = argb >> 8;
885 uint32_t new_red = argb >> 16;
886 new_red -= ColorTransformDelta(green_to_red, green);
887 return (new_red & 0xff);
888 }
889
TransformColorBlue(uint8_t green_to_blue,uint8_t red_to_blue,uint32_t argb)890 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
891 uint8_t red_to_blue,
892 uint32_t argb) {
893 const uint32_t green = argb >> 8;
894 const uint32_t red = argb >> 16;
895 uint8_t new_blue = argb;
896 new_blue -= ColorTransformDelta(green_to_blue, green);
897 new_blue -= ColorTransformDelta(red_to_blue, red);
898 return (new_blue & 0xff);
899 }
900
PredictionCostCrossColor(const int accumulated[256],const int counts[256])901 static float PredictionCostCrossColor(const int accumulated[256],
902 const int counts[256]) {
903 // Favor low entropy, locally and globally.
904 // Favor small absolute values for PredictionCostSpatial
905 static const double kExpValue = 2.4;
906 return CombinedShannonEntropy(counts, accumulated) +
907 PredictionCostSpatial(counts, 3, kExpValue);
908 }
909
GetPredictionCostCrossColorRed(int tile_x_offset,int tile_y_offset,int all_x_max,int all_y_max,int xsize,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int green_to_red,const int accumulated_red_histo[256],const uint32_t * const argb)910 static float GetPredictionCostCrossColorRed(
911 int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
912 int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
913 const int accumulated_red_histo[256], const uint32_t* const argb) {
914 int all_y;
915 int histo[256] = { 0 };
916 float cur_diff;
917 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
918 int ix = all_y * xsize + tile_x_offset;
919 int all_x;
920 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
921 ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
922 }
923 }
924 cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
925 if ((uint8_t)green_to_red == prev_x.green_to_red_) {
926 cur_diff -= 3; // favor keeping the areas locally similar
927 }
928 if ((uint8_t)green_to_red == prev_y.green_to_red_) {
929 cur_diff -= 3; // favor keeping the areas locally similar
930 }
931 if (green_to_red == 0) {
932 cur_diff -= 3;
933 }
934 return cur_diff;
935 }
936
GetBestGreenToRed(int tile_x_offset,int tile_y_offset,int all_x_max,int all_y_max,int xsize,VP8LMultipliers prev_x,VP8LMultipliers prev_y,const int accumulated_red_histo[256],const uint32_t * const argb,VP8LMultipliers * const best_tx)937 static void GetBestGreenToRed(
938 int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
939 int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
940 const int accumulated_red_histo[256], const uint32_t* const argb,
941 VP8LMultipliers* const best_tx) {
942 int min_green_to_red = -64;
943 int max_green_to_red = 64;
944 int green_to_red = 0;
945 int eval_min = 1;
946 int eval_max = 1;
947 float cur_diff_min = MAX_DIFF_COST;
948 float cur_diff_max = MAX_DIFF_COST;
949 // Do a binary search to find the optimal green_to_red color transform.
950 while (max_green_to_red - min_green_to_red > 2) {
951 if (eval_min) {
952 cur_diff_min = GetPredictionCostCrossColorRed(
953 tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
954 prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb);
955 eval_min = 0;
956 }
957 if (eval_max) {
958 cur_diff_max = GetPredictionCostCrossColorRed(
959 tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
960 prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb);
961 eval_max = 0;
962 }
963 if (cur_diff_min < cur_diff_max) {
964 green_to_red = min_green_to_red;
965 max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
966 eval_max = 1;
967 } else {
968 green_to_red = max_green_to_red;
969 min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
970 eval_min = 1;
971 }
972 }
973 best_tx->green_to_red_ = green_to_red;
974 }
975
GetPredictionCostCrossColorBlue(int tile_x_offset,int tile_y_offset,int all_x_max,int all_y_max,int xsize,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int green_to_blue,int red_to_blue,const int accumulated_blue_histo[256],const uint32_t * const argb)976 static float GetPredictionCostCrossColorBlue(
977 int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
978 int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
979 int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
980 const uint32_t* const argb) {
981 int all_y;
982 int histo[256] = { 0 };
983 float cur_diff;
984 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
985 int all_x;
986 int ix = all_y * xsize + tile_x_offset;
987 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
988 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
989 }
990 }
991 cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
992 if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
993 cur_diff -= 3; // favor keeping the areas locally similar
994 }
995 if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
996 cur_diff -= 3; // favor keeping the areas locally similar
997 }
998 if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
999 cur_diff -= 3; // favor keeping the areas locally similar
1000 }
1001 if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
1002 cur_diff -= 3; // favor keeping the areas locally similar
1003 }
1004 if (green_to_blue == 0) {
1005 cur_diff -= 3;
1006 }
1007 if (red_to_blue == 0) {
1008 cur_diff -= 3;
1009 }
1010 return cur_diff;
1011 }
1012
GetBestGreenRedToBlue(int tile_x_offset,int tile_y_offset,int all_x_max,int all_y_max,int xsize,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int quality,const int accumulated_blue_histo[256],const uint32_t * const argb,VP8LMultipliers * const best_tx)1013 static void GetBestGreenRedToBlue(
1014 int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
1015 int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
1016 const int accumulated_blue_histo[256], const uint32_t* const argb,
1017 VP8LMultipliers* const best_tx) {
1018 float best_diff = MAX_DIFF_COST;
1019 float cur_diff;
1020 const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
1021 const int min_green_to_blue = -32;
1022 const int max_green_to_blue = 32;
1023 const int min_red_to_blue = -32;
1024 const int max_red_to_blue = 32;
1025 const int num_iters =
1026 (1 + (max_green_to_blue - min_green_to_blue) / step) *
1027 (1 + (max_red_to_blue - min_red_to_blue) / step);
1028 // Number of tries to get optimal green_to_blue & red_to_blue color transforms
1029 // after finding a local minima.
1030 const int max_tries_after_min = 4 + (num_iters >> 2);
1031 int num_tries_after_min = 0;
1032 int green_to_blue;
1033 for (green_to_blue = min_green_to_blue;
1034 green_to_blue <= max_green_to_blue &&
1035 num_tries_after_min < max_tries_after_min;
1036 green_to_blue += step) {
1037 int red_to_blue;
1038 for (red_to_blue = min_red_to_blue;
1039 red_to_blue <= max_red_to_blue &&
1040 num_tries_after_min < max_tries_after_min;
1041 red_to_blue += step) {
1042 cur_diff = GetPredictionCostCrossColorBlue(
1043 tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
1044 prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);
1045 if (cur_diff < best_diff) {
1046 best_diff = cur_diff;
1047 best_tx->green_to_blue_ = green_to_blue;
1048 best_tx->red_to_blue_ = red_to_blue;
1049 num_tries_after_min = 0;
1050 } else {
1051 ++num_tries_after_min;
1052 }
1053 }
1054 }
1055 }
1056
GetBestColorTransformForTile(int tile_x,int tile_y,int bits,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int quality,int xsize,int ysize,const int accumulated_red_histo[256],const int accumulated_blue_histo[256],const uint32_t * const argb)1057 static VP8LMultipliers GetBestColorTransformForTile(
1058 int tile_x, int tile_y, int bits,
1059 VP8LMultipliers prev_x,
1060 VP8LMultipliers prev_y,
1061 int quality, int xsize, int ysize,
1062 const int accumulated_red_histo[256],
1063 const int accumulated_blue_histo[256],
1064 const uint32_t* const argb) {
1065 const int max_tile_size = 1 << bits;
1066 const int tile_y_offset = tile_y * max_tile_size;
1067 const int tile_x_offset = tile_x * max_tile_size;
1068 const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
1069 const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
1070 VP8LMultipliers best_tx;
1071 MultipliersClear(&best_tx);
1072
1073 GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
1074 prev_x, prev_y, accumulated_red_histo, argb, &best_tx);
1075 GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
1076 xsize, prev_x, prev_y, quality, accumulated_blue_histo,
1077 argb, &best_tx);
1078 return best_tx;
1079 }
1080
CopyTileWithColorTransform(int xsize,int ysize,int tile_x,int tile_y,int max_tile_size,VP8LMultipliers color_transform,uint32_t * argb)1081 static void CopyTileWithColorTransform(int xsize, int ysize,
1082 int tile_x, int tile_y,
1083 int max_tile_size,
1084 VP8LMultipliers color_transform,
1085 uint32_t* argb) {
1086 const int xscan = GetMin(max_tile_size, xsize - tile_x);
1087 int yscan = GetMin(max_tile_size, ysize - tile_y);
1088 argb += tile_y * xsize + tile_x;
1089 while (yscan-- > 0) {
1090 VP8LTransformColor(&color_transform, argb, xscan);
1091 argb += xsize;
1092 }
1093 }
1094
VP8LColorSpaceTransform(int width,int height,int bits,int quality,uint32_t * const argb,uint32_t * image)1095 void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
1096 uint32_t* const argb, uint32_t* image) {
1097 const int max_tile_size = 1 << bits;
1098 const int tile_xsize = VP8LSubSampleSize(width, bits);
1099 const int tile_ysize = VP8LSubSampleSize(height, bits);
1100 int accumulated_red_histo[256] = { 0 };
1101 int accumulated_blue_histo[256] = { 0 };
1102 int tile_x, tile_y;
1103 VP8LMultipliers prev_x, prev_y;
1104 MultipliersClear(&prev_y);
1105 MultipliersClear(&prev_x);
1106 for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1107 for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1108 int y;
1109 const int tile_x_offset = tile_x * max_tile_size;
1110 const int tile_y_offset = tile_y * max_tile_size;
1111 const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
1112 const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
1113 const int offset = tile_y * tile_xsize + tile_x;
1114 if (tile_y != 0) {
1115 ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
1116 }
1117 prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
1118 prev_x, prev_y,
1119 quality, width, height,
1120 accumulated_red_histo,
1121 accumulated_blue_histo,
1122 argb);
1123 image[offset] = MultipliersToColorCode(&prev_x);
1124 CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
1125 max_tile_size, prev_x, argb);
1126
1127 // Gather accumulated histogram data.
1128 for (y = tile_y_offset; y < all_y_max; ++y) {
1129 int ix = y * width + tile_x_offset;
1130 const int ix_end = ix + all_x_max - tile_x_offset;
1131 for (; ix < ix_end; ++ix) {
1132 const uint32_t pix = argb[ix];
1133 if (ix >= 2 &&
1134 pix == argb[ix - 2] &&
1135 pix == argb[ix - 1]) {
1136 continue; // repeated pixels are handled by backward references
1137 }
1138 if (ix >= width + 2 &&
1139 argb[ix - 2] == argb[ix - width - 2] &&
1140 argb[ix - 1] == argb[ix - width - 1] &&
1141 pix == argb[ix - width]) {
1142 continue; // repeated pixels are handled by backward references
1143 }
1144 ++accumulated_red_histo[(pix >> 16) & 0xff];
1145 ++accumulated_blue_histo[(pix >> 0) & 0xff];
1146 }
1147 }
1148 }
1149 }
1150 }
1151
1152 // Color space inverse transform.
ColorSpaceInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)1153 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
1154 int y_start, int y_end, uint32_t* data) {
1155 const int width = transform->xsize_;
1156 const int tile_width = 1 << transform->bits_;
1157 const int mask = tile_width - 1;
1158 const int safe_width = width & ~mask;
1159 const int remaining_width = width - safe_width;
1160 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
1161 int y = y_start;
1162 const uint32_t* pred_row =
1163 transform->data_ + (y >> transform->bits_) * tiles_per_row;
1164
1165 while (y < y_end) {
1166 const uint32_t* pred = pred_row;
1167 VP8LMultipliers m = { 0, 0, 0 };
1168 const uint32_t* const data_safe_end = data + safe_width;
1169 const uint32_t* const data_end = data + width;
1170 while (data < data_safe_end) {
1171 ColorCodeToMultipliers(*pred++, &m);
1172 VP8LTransformColorInverse(&m, data, tile_width);
1173 data += tile_width;
1174 }
1175 if (data < data_end) { // Left-overs using C-version.
1176 ColorCodeToMultipliers(*pred++, &m);
1177 VP8LTransformColorInverse(&m, data, remaining_width);
1178 data += remaining_width;
1179 }
1180 ++y;
1181 if ((y & mask) == 0) pred_row += tiles_per_row;
1182 }
1183 }
1184
1185 // Separate out pixels packed together using pixel-bundling.
1186 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
1187 #define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
1188 void FUNC_NAME(const VP8LTransform* const transform, \
1189 int y_start, int y_end, const TYPE* src, TYPE* dst) { \
1190 int y; \
1191 const int bits_per_pixel = 8 >> transform->bits_; \
1192 const int width = transform->xsize_; \
1193 const uint32_t* const color_map = transform->data_; \
1194 if (bits_per_pixel < 8) { \
1195 const int pixels_per_byte = 1 << transform->bits_; \
1196 const int count_mask = pixels_per_byte - 1; \
1197 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
1198 for (y = y_start; y < y_end; ++y) { \
1199 uint32_t packed_pixels = 0; \
1200 int x; \
1201 for (x = 0; x < width; ++x) { \
1202 /* We need to load fresh 'packed_pixels' once every */ \
1203 /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
1204 /* is a power of 2, so can just use a mask for that, instead of */ \
1205 /* decrementing a counter. */ \
1206 if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
1207 *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
1208 packed_pixels >>= bits_per_pixel; \
1209 } \
1210 } \
1211 } else { \
1212 for (y = y_start; y < y_end; ++y) { \
1213 int x; \
1214 for (x = 0; x < width; ++x) { \
1215 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
1216 } \
1217 } \
1218 } \
1219 }
1220
GetARGBIndex(uint32_t idx)1221 static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
1222 return (idx >> 8) & 0xff;
1223 }
1224
GetAlphaIndex(uint8_t idx)1225 static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
1226 return idx;
1227 }
1228
GetARGBValue(uint32_t val)1229 static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
1230 return val;
1231 }
1232
GetAlphaValue(uint32_t val)1233 static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
1234 return (val >> 8) & 0xff;
1235 }
1236
COLOR_INDEX_INVERSE(ColorIndexInverseTransform,uint32_t,GetARGBIndex,GetARGBValue)1237 static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
1238 GetARGBValue)
1239 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
1240 GetAlphaValue)
1241
1242 #undef COLOR_INDEX_INVERSE
1243
1244 void VP8LInverseTransform(const VP8LTransform* const transform,
1245 int row_start, int row_end,
1246 const uint32_t* const in, uint32_t* const out) {
1247 const int width = transform->xsize_;
1248 assert(row_start < row_end);
1249 assert(row_end <= transform->ysize_);
1250 switch (transform->type_) {
1251 case SUBTRACT_GREEN:
1252 VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
1253 break;
1254 case PREDICTOR_TRANSFORM:
1255 PredictorInverseTransform(transform, row_start, row_end, out);
1256 if (row_end != transform->ysize_) {
1257 // The last predicted row in this iteration will be the top-pred row
1258 // for the first row in next iteration.
1259 memcpy(out - width, out + (row_end - row_start - 1) * width,
1260 width * sizeof(*out));
1261 }
1262 break;
1263 case CROSS_COLOR_TRANSFORM:
1264 ColorSpaceInverseTransform(transform, row_start, row_end, out);
1265 break;
1266 case COLOR_INDEXING_TRANSFORM:
1267 if (in == out && transform->bits_ > 0) {
1268 // Move packed pixels to the end of unpacked region, so that unpacking
1269 // can occur seamlessly.
1270 // Also, note that this is the only transform that applies on
1271 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1272 // transforms work on effective width of xsize_.
1273 const int out_stride = (row_end - row_start) * width;
1274 const int in_stride = (row_end - row_start) *
1275 VP8LSubSampleSize(transform->xsize_, transform->bits_);
1276 uint32_t* const src = out + out_stride - in_stride;
1277 memmove(src, out, in_stride * sizeof(*src));
1278 ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1279 } else {
1280 ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1281 }
1282 break;
1283 }
1284 }
1285
1286 //------------------------------------------------------------------------------
1287 // Color space conversion.
1288
is_big_endian(void)1289 static int is_big_endian(void) {
1290 static const union {
1291 uint16_t w;
1292 uint8_t b[2];
1293 } tmp = { 1 };
1294 return (tmp.b[0] != 1);
1295 }
1296
VP8LConvertBGRAToRGB_C(const uint32_t * src,int num_pixels,uint8_t * dst)1297 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
1298 int num_pixels, uint8_t* dst) {
1299 const uint32_t* const src_end = src + num_pixels;
1300 while (src < src_end) {
1301 const uint32_t argb = *src++;
1302 *dst++ = (argb >> 16) & 0xff;
1303 *dst++ = (argb >> 8) & 0xff;
1304 *dst++ = (argb >> 0) & 0xff;
1305 }
1306 }
1307
VP8LConvertBGRAToRGBA_C(const uint32_t * src,int num_pixels,uint8_t * dst)1308 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
1309 int num_pixels, uint8_t* dst) {
1310 const uint32_t* const src_end = src + num_pixels;
1311 while (src < src_end) {
1312 const uint32_t argb = *src++;
1313 *dst++ = (argb >> 16) & 0xff;
1314 *dst++ = (argb >> 8) & 0xff;
1315 *dst++ = (argb >> 0) & 0xff;
1316 *dst++ = (argb >> 24) & 0xff;
1317 }
1318 }
1319
VP8LConvertBGRAToRGBA4444_C(const uint32_t * src,int num_pixels,uint8_t * dst)1320 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
1321 int num_pixels, uint8_t* dst) {
1322 const uint32_t* const src_end = src + num_pixels;
1323 while (src < src_end) {
1324 const uint32_t argb = *src++;
1325 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1326 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
1327 #ifdef WEBP_SWAP_16BIT_CSP
1328 *dst++ = ba;
1329 *dst++ = rg;
1330 #else
1331 *dst++ = rg;
1332 *dst++ = ba;
1333 #endif
1334 }
1335 }
1336
VP8LConvertBGRAToRGB565_C(const uint32_t * src,int num_pixels,uint8_t * dst)1337 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
1338 int num_pixels, uint8_t* dst) {
1339 const uint32_t* const src_end = src + num_pixels;
1340 while (src < src_end) {
1341 const uint32_t argb = *src++;
1342 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1343 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
1344 #ifdef WEBP_SWAP_16BIT_CSP
1345 *dst++ = gb;
1346 *dst++ = rg;
1347 #else
1348 *dst++ = rg;
1349 *dst++ = gb;
1350 #endif
1351 }
1352 }
1353
VP8LConvertBGRAToBGR_C(const uint32_t * src,int num_pixels,uint8_t * dst)1354 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
1355 int num_pixels, uint8_t* dst) {
1356 const uint32_t* const src_end = src + num_pixels;
1357 while (src < src_end) {
1358 const uint32_t argb = *src++;
1359 *dst++ = (argb >> 0) & 0xff;
1360 *dst++ = (argb >> 8) & 0xff;
1361 *dst++ = (argb >> 16) & 0xff;
1362 }
1363 }
1364
CopyOrSwap(const uint32_t * src,int num_pixels,uint8_t * dst,int swap_on_big_endian)1365 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1366 int swap_on_big_endian) {
1367 if (is_big_endian() == swap_on_big_endian) {
1368 const uint32_t* const src_end = src + num_pixels;
1369 while (src < src_end) {
1370 const uint32_t argb = *src++;
1371
1372 #if !defined(WORDS_BIGENDIAN)
1373 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1374 *(uint32_t*)dst = BSwap32(argb);
1375 #else // WEBP_REFERENCE_IMPLEMENTATION
1376 dst[0] = (argb >> 24) & 0xff;
1377 dst[1] = (argb >> 16) & 0xff;
1378 dst[2] = (argb >> 8) & 0xff;
1379 dst[3] = (argb >> 0) & 0xff;
1380 #endif
1381 #else // WORDS_BIGENDIAN
1382 dst[0] = (argb >> 0) & 0xff;
1383 dst[1] = (argb >> 8) & 0xff;
1384 dst[2] = (argb >> 16) & 0xff;
1385 dst[3] = (argb >> 24) & 0xff;
1386 #endif
1387 dst += sizeof(argb);
1388 }
1389 } else {
1390 memcpy(dst, src, num_pixels * sizeof(*src));
1391 }
1392 }
1393
VP8LConvertFromBGRA(const uint32_t * const in_data,int num_pixels,WEBP_CSP_MODE out_colorspace,uint8_t * const rgba)1394 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1395 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1396 switch (out_colorspace) {
1397 case MODE_RGB:
1398 VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
1399 break;
1400 case MODE_RGBA:
1401 VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
1402 break;
1403 case MODE_rgbA:
1404 VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
1405 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1406 break;
1407 case MODE_BGR:
1408 VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
1409 break;
1410 case MODE_BGRA:
1411 CopyOrSwap(in_data, num_pixels, rgba, 1);
1412 break;
1413 case MODE_bgrA:
1414 CopyOrSwap(in_data, num_pixels, rgba, 1);
1415 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1416 break;
1417 case MODE_ARGB:
1418 CopyOrSwap(in_data, num_pixels, rgba, 0);
1419 break;
1420 case MODE_Argb:
1421 CopyOrSwap(in_data, num_pixels, rgba, 0);
1422 WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1423 break;
1424 case MODE_RGBA_4444:
1425 VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1426 break;
1427 case MODE_rgbA_4444:
1428 VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1429 WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1430 break;
1431 case MODE_RGB_565:
1432 VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
1433 break;
1434 default:
1435 assert(0); // Code flow should not reach here.
1436 }
1437 }
1438
1439 //------------------------------------------------------------------------------
1440 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
VP8LBundleColorMap(const uint8_t * const row,int width,int xbits,uint32_t * const dst)1441 void VP8LBundleColorMap(const uint8_t* const row, int width,
1442 int xbits, uint32_t* const dst) {
1443 int x;
1444 if (xbits > 0) {
1445 const int bit_depth = 1 << (3 - xbits);
1446 const int mask = (1 << xbits) - 1;
1447 uint32_t code = 0xff000000;
1448 for (x = 0; x < width; ++x) {
1449 const int xsub = x & mask;
1450 if (xsub == 0) {
1451 code = 0xff000000;
1452 }
1453 code |= row[x] << (8 + bit_depth * xsub);
1454 dst[x >> xbits] = code;
1455 }
1456 } else {
1457 for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
1458 }
1459 }
1460
1461 //------------------------------------------------------------------------------
1462
ExtraCost(const uint32_t * population,int length)1463 static double ExtraCost(const uint32_t* population, int length) {
1464 int i;
1465 double cost = 0.;
1466 for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
1467 return cost;
1468 }
1469
ExtraCostCombined(const uint32_t * X,const uint32_t * Y,int length)1470 static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
1471 int length) {
1472 int i;
1473 double cost = 0.;
1474 for (i = 2; i < length - 2; ++i) {
1475 const int xy = X[i + 2] + Y[i + 2];
1476 cost += (i >> 1) * xy;
1477 }
1478 return cost;
1479 }
1480
1481 // Returns the various RLE counts
HuffmanCostCount(const uint32_t * population,int length)1482 static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
1483 int i;
1484 int streak = 0;
1485 VP8LStreaks stats;
1486 memset(&stats, 0, sizeof(stats));
1487 for (i = 0; i < length - 1; ++i) {
1488 ++streak;
1489 if (population[i] == population[i + 1]) {
1490 continue;
1491 }
1492 stats.counts[population[i] != 0] += (streak > 3);
1493 stats.streaks[population[i] != 0][(streak > 3)] += streak;
1494 streak = 0;
1495 }
1496 ++streak;
1497 stats.counts[population[i] != 0] += (streak > 3);
1498 stats.streaks[population[i] != 0][(streak > 3)] += streak;
1499 return stats;
1500 }
1501
HuffmanCostCombinedCount(const uint32_t * X,const uint32_t * Y,int length)1502 static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
1503 const uint32_t* Y, int length) {
1504 int i;
1505 int streak = 0;
1506 VP8LStreaks stats;
1507 memset(&stats, 0, sizeof(stats));
1508 for (i = 0; i < length - 1; ++i) {
1509 const int xy = X[i] + Y[i];
1510 const int xy_next = X[i + 1] + Y[i + 1];
1511 ++streak;
1512 if (xy == xy_next) {
1513 continue;
1514 }
1515 stats.counts[xy != 0] += (streak > 3);
1516 stats.streaks[xy != 0][(streak > 3)] += streak;
1517 streak = 0;
1518 }
1519 {
1520 const int xy = X[i] + Y[i];
1521 ++streak;
1522 stats.counts[xy != 0] += (streak > 3);
1523 stats.streaks[xy != 0][(streak > 3)] += streak;
1524 }
1525 return stats;
1526 }
1527
1528 //------------------------------------------------------------------------------
1529
HistogramAdd(const VP8LHistogram * const a,const VP8LHistogram * const b,VP8LHistogram * const out)1530 static void HistogramAdd(const VP8LHistogram* const a,
1531 const VP8LHistogram* const b,
1532 VP8LHistogram* const out) {
1533 int i;
1534 const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
1535 assert(a->palette_code_bits_ == b->palette_code_bits_);
1536 if (b != out) {
1537 for (i = 0; i < literal_size; ++i) {
1538 out->literal_[i] = a->literal_[i] + b->literal_[i];
1539 }
1540 for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
1541 out->distance_[i] = a->distance_[i] + b->distance_[i];
1542 }
1543 for (i = 0; i < NUM_LITERAL_CODES; ++i) {
1544 out->red_[i] = a->red_[i] + b->red_[i];
1545 out->blue_[i] = a->blue_[i] + b->blue_[i];
1546 out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
1547 }
1548 } else {
1549 for (i = 0; i < literal_size; ++i) {
1550 out->literal_[i] += a->literal_[i];
1551 }
1552 for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
1553 out->distance_[i] += a->distance_[i];
1554 }
1555 for (i = 0; i < NUM_LITERAL_CODES; ++i) {
1556 out->red_[i] += a->red_[i];
1557 out->blue_[i] += a->blue_[i];
1558 out->alpha_[i] += a->alpha_[i];
1559 }
1560 }
1561 }
1562
1563 //------------------------------------------------------------------------------
1564
1565 VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
1566 VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
1567 VP8LPredictorFunc VP8LPredictors[16];
1568
1569 VP8LTransformColorFunc VP8LTransformColor;
1570 VP8LTransformColorFunc VP8LTransformColorInverse;
1571
1572 VP8LConvertFunc VP8LConvertBGRAToRGB;
1573 VP8LConvertFunc VP8LConvertBGRAToRGBA;
1574 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
1575 VP8LConvertFunc VP8LConvertBGRAToRGB565;
1576 VP8LConvertFunc VP8LConvertBGRAToBGR;
1577
1578 VP8LFastLog2SlowFunc VP8LFastLog2Slow;
1579 VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
1580
1581 VP8LCostFunc VP8LExtraCost;
1582 VP8LCostCombinedFunc VP8LExtraCostCombined;
1583
1584 VP8LCostCountFunc VP8LHuffmanCostCount;
1585 VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
1586
1587 VP8LHistogramAddFunc VP8LHistogramAdd;
1588
1589 extern void VP8LDspInitSSE2(void);
1590 extern void VP8LDspInitNEON(void);
1591 extern void VP8LDspInitMIPS32(void);
1592
1593 static volatile VP8CPUInfo lossless_last_cpuinfo_used =
1594 (VP8CPUInfo)&lossless_last_cpuinfo_used;
1595
VP8LDspInit(void)1596 void VP8LDspInit(void) {
1597 if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
1598
1599 memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
1600
1601 VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
1602 VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
1603
1604 VP8LTransformColor = VP8LTransformColor_C;
1605 VP8LTransformColorInverse = VP8LTransformColorInverse_C;
1606
1607 VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
1608 VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
1609 VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
1610 VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
1611 VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
1612
1613 VP8LFastLog2Slow = FastLog2Slow;
1614 VP8LFastSLog2Slow = FastSLog2Slow;
1615
1616 VP8LExtraCost = ExtraCost;
1617 VP8LExtraCostCombined = ExtraCostCombined;
1618
1619 VP8LHuffmanCostCount = HuffmanCostCount;
1620 VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
1621
1622 VP8LHistogramAdd = HistogramAdd;
1623
1624 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
1625 if (VP8GetCPUInfo != NULL) {
1626 #if defined(WEBP_USE_SSE2)
1627 if (VP8GetCPUInfo(kSSE2)) {
1628 VP8LDspInitSSE2();
1629 }
1630 #endif
1631 #if defined(WEBP_USE_NEON)
1632 if (VP8GetCPUInfo(kNEON)) {
1633 VP8LDspInitNEON();
1634 }
1635 #endif
1636 #if defined(WEBP_USE_MIPS32)
1637 if (VP8GetCPUInfo(kMIPS32)) {
1638 VP8LDspInitMIPS32();
1639 }
1640 #endif
1641 }
1642 lossless_last_cpuinfo_used = VP8GetCPUInfo;
1643 }
1644
1645 //------------------------------------------------------------------------------
1646