1 
2 /********************************************************************
3  *                                                                  *
4  * THIS FILE IS PART OF THE 'ZYWRLE' VNC CODEC SOURCE CODE.         *
5  *                                                                  *
6  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
7  * GOVERNED BY A FOLLOWING BSD-STYLE SOURCE LICENSE.                *
8  * PLEASE READ THESE TERMS BEFORE DISTRIBUTING.                     *
9  *                                                                  *
10  * THE 'ZYWRLE' VNC CODEC SOURCE CODE IS (C) COPYRIGHT 2006         *
11  * BY Hitachi Systems & Services, Ltd.                              *
12  * (Noriaki Yamazaki, Research & Developement Center)               *                                                                 *
13  *                                                                  *
14  ********************************************************************
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions
17 are met:
18 
19 - Redistributions of source code must retain the above copyright
20 notice, this list of conditions and the following disclaimer.
21 
22 - Redistributions in binary form must reproduce the above copyright
23 notice, this list of conditions and the following disclaimer in the
24 documentation and/or other materials provided with the distribution.
25 
26 - Neither the name of the Hitachi Systems & Services, Ltd. nor
27 the names of its contributors may be used to endorse or promote
28 products derived from this software without specific prior written
29 permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
35 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42  ********************************************************************/
43 
44 /* Change Log:
45      V0.02 : 2008/02/04 : Fix mis encode/decode when width != scanline
46 	                     (Thanks Johannes Schindelin, author of LibVNC
47 						  Server/Client)
48      V0.01 : 2007/02/06 : Initial release
49 */
50 
51 /* #define ZYWRLE_ENCODE */
52 /* #define ZYWRLE_DECODE */
53 #define ZYWRLE_QUANTIZE
54 
55 /*
56 [References]
57  PLHarr:
58    Senecal, J. G., P. Lindstrom, M. A. Duchaineau, and K. I. Joy, "An Improved N-Bit to N-Bit Reversible Haar-Like Transform," Pacific Graphics 2004, October 2004, pp. 371-380.
59  EZW:
60    Shapiro, JM: Embedded Image Coding Using Zerotrees of Wavelet Coefficients, IEEE Trans. Signal. Process., Vol.41, pp.3445-3462 (1993).
61 */
62 
63 
64 /* Template Macro stuffs. */
65 #undef ZYWRLE_ANALYZE
66 #undef ZYWRLE_SYNTHESIZE
67 #define ZYWRLE_ANALYZE __RFB_CONCAT3E(zywrleAnalyze,BPP,END_FIX)
68 #define ZYWRLE_SYNTHESIZE __RFB_CONCAT3E(zywrleSynthesize,BPP,END_FIX)
69 
70 #define ZYWRLE_RGBYUV __RFB_CONCAT3E(zywrleRGBYUV,BPP,END_FIX)
71 #define ZYWRLE_YUVRGB __RFB_CONCAT3E(zywrleYUVRGB,BPP,END_FIX)
72 #define ZYWRLE_YMASK __RFB_CONCAT2E(ZYWRLE_YMASK,BPP)
73 #define ZYWRLE_UVMASK __RFB_CONCAT2E(ZYWRLE_UVMASK,BPP)
74 #define ZYWRLE_LOAD_PIXEL __RFB_CONCAT2E(ZYWRLE_LOAD_PIXEL,BPP)
75 #define ZYWRLE_SAVE_PIXEL __RFB_CONCAT2E(ZYWRLE_SAVE_PIXEL,BPP)
76 
77 /* Packing/Unpacking pixel stuffs.
78    Endian conversion stuffs. */
79 #undef S_0
80 #undef S_1
81 #undef L_0
82 #undef L_1
83 #undef L_2
84 #if ZYWRLE_ENDIAN == ENDIAN_BIG
85 #  define S_0	1
86 #  define S_1	0
87 #  define L_0	3
88 #  define L_1	2
89 #  define L_2	1
90 #else
91 #  define S_0	0
92 #  define S_1	1
93 #  define L_0	0
94 #  define L_1	1
95 #  define L_2	2
96 #endif
97 
98 /*   Load/Save pixel stuffs. */
99 #define ZYWRLE_YMASK15  0xFFFFFFF8
100 #define ZYWRLE_UVMASK15 0xFFFFFFF8
101 #define ZYWRLE_LOAD_PIXEL15(pSrc,R,G,B) { \
102 	R =  (((unsigned char*)pSrc)[S_1]<< 1)& 0xF8;	\
103 	G = ((((unsigned char*)pSrc)[S_1]<< 6)|(((unsigned char*)pSrc)[S_0]>> 2))& 0xF8;	\
104 	B =  (((unsigned char*)pSrc)[S_0]<< 3)& 0xF8;	\
105 }
106 #define ZYWRLE_SAVE_PIXEL15(pDst,R,G,B) { \
107 	R &= 0xF8;	\
108 	G &= 0xF8;	\
109 	B &= 0xF8;	\
110 	((unsigned char*)pDst)[S_1] = (unsigned char)( (R>>1)|(G>>6)       );	\
111 	((unsigned char*)pDst)[S_0] = (unsigned char)(((B>>3)|(G<<2))& 0xFF);	\
112 }
113 #define ZYWRLE_YMASK16  0xFFFFFFFC
114 #define ZYWRLE_UVMASK16 0xFFFFFFF8
115 #define ZYWRLE_LOAD_PIXEL16(pSrc,R,G,B) { \
116 	R =   ((unsigned char*)pSrc)[S_1]     & 0xF8;	\
117 	G = ((((unsigned char*)pSrc)[S_1]<< 5)|(((unsigned char*)pSrc)[S_0]>> 3))& 0xFC;	\
118 	B =  (((unsigned char*)pSrc)[S_0]<< 3)& 0xF8;	\
119 }
120 #define ZYWRLE_SAVE_PIXEL16(pDst,R,G,B) { \
121 	R &= 0xF8;	\
122 	G &= 0xFC;	\
123 	B &= 0xF8;	\
124 	((unsigned char*)pDst)[S_1] = (unsigned char)(  R    |(G>>5)       );	\
125 	((unsigned char*)pDst)[S_0] = (unsigned char)(((B>>3)|(G<<3))& 0xFF);	\
126 }
127 #define ZYWRLE_YMASK32  0xFFFFFFFF
128 #define ZYWRLE_UVMASK32 0xFFFFFFFF
129 #define ZYWRLE_LOAD_PIXEL32(pSrc,R,G,B) { \
130 	R = ((unsigned char*)pSrc)[L_2];	\
131 	G = ((unsigned char*)pSrc)[L_1];	\
132 	B = ((unsigned char*)pSrc)[L_0];	\
133 }
134 #define ZYWRLE_SAVE_PIXEL32(pDst,R,G,B) { \
135 	((unsigned char*)pDst)[L_2] = (unsigned char)R;	\
136 	((unsigned char*)pDst)[L_1] = (unsigned char)G;	\
137 	((unsigned char*)pDst)[L_0] = (unsigned char)B;	\
138 }
139 
140 #ifndef ZYWRLE_ONCE
141 #define ZYWRLE_ONCE
142 
143 #ifdef WIN32
144 #define InlineX __inline
145 #else
146 # ifndef __STRICT_ANSI__
147 #  define InlineX inline
148 # else
149 #  define InlineX
150 # endif
151 #endif
152 
153 #ifdef ZYWRLE_ENCODE
154 /* Tables for Coefficients filtering. */
155 #  ifndef ZYWRLE_QUANTIZE
156 /* Type A:lower bit omitting of EZW style. */
157 const static unsigned int zywrleParam[3][3]={
158 	{0x0000F000,0x00000000,0x00000000},
159 	{0x0000C000,0x00F0F0F0,0x00000000},
160 	{0x0000C000,0x00C0C0C0,0x00F0F0F0},
161 /*	{0x0000FF00,0x00000000,0x00000000},
162 	{0x0000FF00,0x00FFFFFF,0x00000000},
163 	{0x0000FF00,0x00FFFFFF,0x00FFFFFF}, */
164 };
165 #  else
166 /* Type B:Non liner quantization filter. */
167 static const signed char zywrleConv[4][256]={
168 {	/* bi=5, bo=5 r=0.0:PSNR=24.849 */
169 	0, 0, 0, 0, 0, 0, 0, 0,
170 	0, 0, 0, 0, 0, 0, 0, 0,
171 	0, 0, 0, 0, 0, 0, 0, 0,
172 	0, 0, 0, 0, 0, 0, 0, 0,
173 	0, 0, 0, 0, 0, 0, 0, 0,
174 	0, 0, 0, 0, 0, 0, 0, 0,
175 	0, 0, 0, 0, 0, 0, 0, 0,
176 	0, 0, 0, 0, 0, 0, 0, 0,
177 	0, 0, 0, 0, 0, 0, 0, 0,
178 	0, 0, 0, 0, 0, 0, 0, 0,
179 	0, 0, 0, 0, 0, 0, 0, 0,
180 	0, 0, 0, 0, 0, 0, 0, 0,
181 	0, 0, 0, 0, 0, 0, 0, 0,
182 	0, 0, 0, 0, 0, 0, 0, 0,
183 	0, 0, 0, 0, 0, 0, 0, 0,
184 	0, 0, 0, 0, 0, 0, 0, 0,
185 	0, 0, 0, 0, 0, 0, 0, 0,
186 	0, 0, 0, 0, 0, 0, 0, 0,
187 	0, 0, 0, 0, 0, 0, 0, 0,
188 	0, 0, 0, 0, 0, 0, 0, 0,
189 	0, 0, 0, 0, 0, 0, 0, 0,
190 	0, 0, 0, 0, 0, 0, 0, 0,
191 	0, 0, 0, 0, 0, 0, 0, 0,
192 	0, 0, 0, 0, 0, 0, 0, 0,
193 	0, 0, 0, 0, 0, 0, 0, 0,
194 	0, 0, 0, 0, 0, 0, 0, 0,
195 	0, 0, 0, 0, 0, 0, 0, 0,
196 	0, 0, 0, 0, 0, 0, 0, 0,
197 	0, 0, 0, 0, 0, 0, 0, 0,
198 	0, 0, 0, 0, 0, 0, 0, 0,
199 	0, 0, 0, 0, 0, 0, 0, 0,
200 	0, 0, 0, 0, 0, 0, 0, 0,
201 },
202 {	/* bi=5, bo=5 r=2.0:PSNR=74.031 */
203 	0, 0, 0, 0, 0, 0, 0, 0,
204 	0, 0, 0, 0, 0, 0, 0, 0,
205 	0, 0, 0, 0, 0, 0, 0, 32,
206 	32, 32, 32, 32, 32, 32, 32, 32,
207 	32, 32, 32, 32, 32, 32, 32, 32,
208 	48, 48, 48, 48, 48, 48, 48, 48,
209 	48, 48, 48, 56, 56, 56, 56, 56,
210 	56, 56, 56, 56, 64, 64, 64, 64,
211 	64, 64, 64, 64, 72, 72, 72, 72,
212 	72, 72, 72, 72, 80, 80, 80, 80,
213 	80, 80, 88, 88, 88, 88, 88, 88,
214 	88, 88, 88, 88, 88, 88, 96, 96,
215 	96, 96, 96, 104, 104, 104, 104, 104,
216 	104, 104, 104, 104, 104, 112, 112, 112,
217 	112, 112, 112, 112, 112, 112, 120, 120,
218 	120, 120, 120, 120, 120, 120, 120, 120,
219 	0, -120, -120, -120, -120, -120, -120, -120,
220 	-120, -120, -120, -112, -112, -112, -112, -112,
221 	-112, -112, -112, -112, -104, -104, -104, -104,
222 	-104, -104, -104, -104, -104, -104, -96, -96,
223 	-96, -96, -96, -88, -88, -88, -88, -88,
224 	-88, -88, -88, -88, -88, -88, -88, -80,
225 	-80, -80, -80, -80, -80, -72, -72, -72,
226 	-72, -72, -72, -72, -72, -64, -64, -64,
227 	-64, -64, -64, -64, -64, -56, -56, -56,
228 	-56, -56, -56, -56, -56, -56, -48, -48,
229 	-48, -48, -48, -48, -48, -48, -48, -48,
230 	-48, -32, -32, -32, -32, -32, -32, -32,
231 	-32, -32, -32, -32, -32, -32, -32, -32,
232 	-32, -32, 0, 0, 0, 0, 0, 0,
233 	0, 0, 0, 0, 0, 0, 0, 0,
234 	0, 0, 0, 0, 0, 0, 0, 0,
235 },
236 {	/* bi=5, bo=4 r=2.0:PSNR=64.441 */
237 	0, 0, 0, 0, 0, 0, 0, 0,
238 	0, 0, 0, 0, 0, 0, 0, 0,
239 	0, 0, 0, 0, 0, 0, 0, 0,
240 	0, 0, 0, 0, 0, 0, 0, 0,
241 	48, 48, 48, 48, 48, 48, 48, 48,
242 	48, 48, 48, 48, 48, 48, 48, 48,
243 	48, 48, 48, 48, 48, 48, 48, 48,
244 	64, 64, 64, 64, 64, 64, 64, 64,
245 	64, 64, 64, 64, 64, 64, 64, 64,
246 	80, 80, 80, 80, 80, 80, 80, 80,
247 	80, 80, 80, 80, 80, 88, 88, 88,
248 	88, 88, 88, 88, 88, 88, 88, 88,
249 	104, 104, 104, 104, 104, 104, 104, 104,
250 	104, 104, 104, 112, 112, 112, 112, 112,
251 	112, 112, 112, 112, 120, 120, 120, 120,
252 	120, 120, 120, 120, 120, 120, 120, 120,
253 	0, -120, -120, -120, -120, -120, -120, -120,
254 	-120, -120, -120, -120, -120, -112, -112, -112,
255 	-112, -112, -112, -112, -112, -112, -104, -104,
256 	-104, -104, -104, -104, -104, -104, -104, -104,
257 	-104, -88, -88, -88, -88, -88, -88, -88,
258 	-88, -88, -88, -88, -80, -80, -80, -80,
259 	-80, -80, -80, -80, -80, -80, -80, -80,
260 	-80, -64, -64, -64, -64, -64, -64, -64,
261 	-64, -64, -64, -64, -64, -64, -64, -64,
262 	-64, -48, -48, -48, -48, -48, -48, -48,
263 	-48, -48, -48, -48, -48, -48, -48, -48,
264 	-48, -48, -48, -48, -48, -48, -48, -48,
265 	-48, 0, 0, 0, 0, 0, 0, 0,
266 	0, 0, 0, 0, 0, 0, 0, 0,
267 	0, 0, 0, 0, 0, 0, 0, 0,
268 	0, 0, 0, 0, 0, 0, 0, 0,
269 },
270 {	/* bi=5, bo=2 r=2.0:PSNR=43.175 */
271 	0, 0, 0, 0, 0, 0, 0, 0,
272 	0, 0, 0, 0, 0, 0, 0, 0,
273 	0, 0, 0, 0, 0, 0, 0, 0,
274 	0, 0, 0, 0, 0, 0, 0, 0,
275 	0, 0, 0, 0, 0, 0, 0, 0,
276 	0, 0, 0, 0, 0, 0, 0, 0,
277 	0, 0, 0, 0, 0, 0, 0, 0,
278 	0, 0, 0, 0, 0, 0, 0, 0,
279 	88, 88, 88, 88, 88, 88, 88, 88,
280 	88, 88, 88, 88, 88, 88, 88, 88,
281 	88, 88, 88, 88, 88, 88, 88, 88,
282 	88, 88, 88, 88, 88, 88, 88, 88,
283 	88, 88, 88, 88, 88, 88, 88, 88,
284 	88, 88, 88, 88, 88, 88, 88, 88,
285 	88, 88, 88, 88, 88, 88, 88, 88,
286 	88, 88, 88, 88, 88, 88, 88, 88,
287 	0, -88, -88, -88, -88, -88, -88, -88,
288 	-88, -88, -88, -88, -88, -88, -88, -88,
289 	-88, -88, -88, -88, -88, -88, -88, -88,
290 	-88, -88, -88, -88, -88, -88, -88, -88,
291 	-88, -88, -88, -88, -88, -88, -88, -88,
292 	-88, -88, -88, -88, -88, -88, -88, -88,
293 	-88, -88, -88, -88, -88, -88, -88, -88,
294 	-88, -88, -88, -88, -88, -88, -88, -88,
295 	-88, 0, 0, 0, 0, 0, 0, 0,
296 	0, 0, 0, 0, 0, 0, 0, 0,
297 	0, 0, 0, 0, 0, 0, 0, 0,
298 	0, 0, 0, 0, 0, 0, 0, 0,
299 	0, 0, 0, 0, 0, 0, 0, 0,
300 	0, 0, 0, 0, 0, 0, 0, 0,
301 	0, 0, 0, 0, 0, 0, 0, 0,
302 	0, 0, 0, 0, 0, 0, 0, 0,
303 }
304 };
305 const static signed char* zywrleParam[3][3][3]={
306 	{{zywrleConv[0],zywrleConv[2],zywrleConv[0]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]}},
307 	{{zywrleConv[0],zywrleConv[3],zywrleConv[0]},{zywrleConv[1],zywrleConv[1],zywrleConv[1]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]}},
308 	{{zywrleConv[0],zywrleConv[3],zywrleConv[0]},{zywrleConv[2],zywrleConv[2],zywrleConv[2]},{zywrleConv[1],zywrleConv[1],zywrleConv[1]}},
309 };
310 #  endif
311 #endif
312 
Harr(signed char * pX0,signed char * pX1)313 static InlineX void Harr(signed char* pX0, signed char* pX1)
314 {
315 	/* Piecewise-Linear Harr(PLHarr) */
316 	int X0 = (int)*pX0, X1 = (int)*pX1;
317 	int orgX0 = X0, orgX1 = X1;
318 	if ((X0 ^ X1) & 0x80) {
319 		/* differ sign */
320 		X1 += X0;
321 		if (((X1^orgX1)&0x80)==0) {
322 			/* |X1| > |X0| */
323 			X0 -= X1;	/* H = -B */
324 		}
325 	} else {
326 		/* same sign */
327 		X0 -= X1;
328 		if (((X0 ^ orgX0) & 0x80) == 0) {
329 			/* |X0| > |X1| */
330 			X1 += X0;	/* L = A */
331 		}
332 	}
333 	*pX0 = (signed char)X1;
334 	*pX1 = (signed char)X0;
335 }
336 /*
337  1D-Wavelet transform.
338 
339  In coefficients array, the famous 'pyramid' decomposition is well used.
340 
341  1D Model:
342    |L0L0L0L0|L0L0L0L0|H0H0H0H0|H0H0H0H0| : level 0
343    |L1L1L1L1|H1H1H1H1|H0H0H0H0|H0H0H0H0| : level 1
344 
345  But this method needs line buffer because H/L is different position from X0/X1.
346  So, I used 'interleave' decomposition instead of it.
347 
348  1D Model:
349    |L0H0L0H0|L0H0L0H0|L0H0L0H0|L0H0L0H0| : level 0
350    |L1H0H1H0|L1H0H1H0|L1H0H1H0|L1H0H1H0| : level 1
351 
352  In this method, H/L and X0/X1 is always same position.
353  This lead us to more speed and less memory.
354  Of cause, the result of both method is quite same
355  because it's only difference that coefficient position.
356 */
WaveletLevel(int * data,int size,int l,int SkipPixel)357 static InlineX void WaveletLevel(int* data, int size, int l, int SkipPixel)
358 {
359 	int s, ofs;
360 	signed char* pX0;
361 	signed char* end;
362 
363 	pX0 = (signed char*)data;
364 	s = (8<<l)*SkipPixel;
365 	end = pX0+(size>>(l+1))*s;
366 	s -= 2;
367 	ofs = (4<<l)*SkipPixel;
368 	while (pX0 < end) {
369 		Harr(pX0, pX0+ofs);
370 		pX0++;
371 		Harr(pX0, pX0+ofs);
372 		pX0++;
373 		Harr(pX0, pX0+ofs);
374 		pX0 += s;
375 	}
376 }
377 #define InvWaveletLevel(d,s,l,pix) WaveletLevel(d,s,l,pix)
378 
379 #ifdef ZYWRLE_ENCODE
380 #  ifndef ZYWRLE_QUANTIZE
381 /* Type A:lower bit omitting of EZW style. */
FilterWaveletSquare(int * pBuf,int width,int height,int level,int l)382 static InlineX void FilterWaveletSquare(int* pBuf, int width, int height, int level, int l)
383 {
384 	int r, s;
385 	int x, y;
386 	int* pH;
387 	const unsigned int* pM;
388 
389 	pM = &(zywrleParam[level-1][l]);
390 	s = 2<<l;
391 	for (r = 1; r < 4; r++) {
392 		pH   = pBuf;
393 		if (r & 0x01)
394 			pH +=  s>>1;
395 		if (r & 0x02)
396 			pH += (s>>1)*width;
397 		for (y = 0; y < height / s; y++) {
398 			for (x = 0; x < width / s; x++) {
399 				/*
400 				 these are same following code.
401 				     pH[x] = pH[x] / (~pM[x]+1) * (~pM[x]+1);
402 				     ( round pH[x] with pM[x] bit )
403 				 '&' operator isn't 'round' but is 'floor'.
404 				 So, we must offset when pH[x] is negative.
405 				*/
406 				if (((signed char*)pH)[0] & 0x80)
407 					((signed char*)pH)[0] += ~((signed char*)pM)[0];
408 				if (((signed char*)pH)[1] & 0x80)
409 					((signed char*)pH)[1] += ~((signed char*)pM)[1];
410 				if (((signed char*)pH)[2] & 0x80)
411 					((signed char*)pH)[2] += ~((signed char*)pM)[2];
412 				*pH &= *pM;
413 				pH += s;
414 			}
415 			pH += (s-1)*width;
416 		}
417 	}
418 }
419 #  else
420 /*
421  Type B:Non liner quantization filter.
422 
423  Coefficients have Gaussian curve and smaller value which is
424  large part of coefficients isn't more important than larger value.
425  So, I use filter of Non liner quantize/dequantize table.
426  In general, Non liner quantize formula is explained as following.
427 
428     y=f(x)   = sign(x)*round( ((abs(x)/(2^7))^ r   )* 2^(bo-1) )*2^(8-bo)
429     x=f-1(y) = sign(y)*round( ((abs(y)/(2^7))^(1/r))* 2^(bi-1) )*2^(8-bi)
430  ( r:power coefficient  bi:effective MSB in input  bo:effective MSB in output )
431 
432    r < 1.0 : Smaller value is more important than larger value.
433    r > 1.0 : Larger value is more important than smaller value.
434    r = 1.0 : Liner quantization which is same with EZW style.
435 
436  r = 0.75 is famous non liner quantization used in MP3 audio codec.
437  In contrast to audio data, larger value is important in wavelet coefficients.
438  So, I select r = 2.0 table( quantize is x^2, dequantize sqrt(x) ).
439 
440  As compared with EZW style liner quantization, this filter tended to be
441  more sharp edge and be more compression rate but be more blocking noise and be less quality.
442  Especially, the surface of graphic objects has distinguishable noise in middle quality mode.
443 
444  We need only quantized-dequantized(filtered) value rather than quantized value itself
445  because all values are packed or palette-lized in later ZRLE section.
446  This lead us not to need to modify client decoder when we change
447  the filtering procedure in future.
448  Client only decodes coefficients given by encoder.
449 */
FilterWaveletSquare(int * pBuf,int width,int height,int level,int l)450 static InlineX void FilterWaveletSquare(int* pBuf, int width, int height, int level, int l)
451 {
452 	int r, s;
453 	int x, y;
454 	int* pH;
455 	const signed char** pM;
456 
457 	pM = zywrleParam[level-1][l];
458 	s = 2<<l;
459 	for (r = 1; r < 4; r++) {
460 		pH   = pBuf;
461 		if (r & 0x01)
462 			pH +=  s>>1;
463 		if (r & 0x02)
464 			pH += (s>>1)*width;
465 		for (y = 0; y < height / s; y++) {
466 			for (x = 0; x < width / s; x++) {
467 				((signed char*)pH)[0] = pM[0][((unsigned char*)pH)[0]];
468 				((signed char*)pH)[1] = pM[1][((unsigned char*)pH)[1]];
469 				((signed char*)pH)[2] = pM[2][((unsigned char*)pH)[2]];
470 				pH += s;
471 			}
472 			pH += (s-1)*width;
473 		}
474 	}
475 }
476 #  endif
477 
Wavelet(int * pBuf,int width,int height,int level)478 static InlineX void Wavelet(int* pBuf, int width, int height, int level)
479 {
480 	int l, s;
481 	int* pTop;
482 	int* pEnd;
483 
484 	for (l = 0; l < level; l++) {
485 		pTop = pBuf;
486 		pEnd = pBuf+height*width;
487 		s = width<<l;
488 		while (pTop < pEnd) {
489 			WaveletLevel(pTop, width, l, 1);
490 			pTop += s;
491 		}
492 		pTop = pBuf;
493 		pEnd = pBuf+width;
494 		s = 1<<l;
495 		while (pTop < pEnd) {
496 			WaveletLevel(pTop, height,l, width);
497 			pTop += s;
498 		}
499 		FilterWaveletSquare(pBuf, width, height, level, l);
500 	}
501 }
502 #endif
503 #ifdef ZYWRLE_DECODE
InvWavelet(int * pBuf,int width,int height,int level)504 static InlineX void InvWavelet(int* pBuf, int width, int height, int level)
505 {
506 	int l, s;
507 	int* pTop;
508 	int* pEnd;
509 
510 	for (l = level - 1; l >= 0; l--) {
511 		pTop = pBuf;
512 		pEnd = pBuf+width;
513 		s = 1<<l;
514 		while (pTop < pEnd) {
515 			InvWaveletLevel(pTop, height,l, width);
516 			pTop += s;
517 		}
518 		pTop = pBuf;
519 		pEnd = pBuf+height*width;
520 		s = width<<l;
521 		while (pTop < pEnd) {
522 			InvWaveletLevel(pTop, width, l, 1);
523 			pTop += s;
524 		}
525 	}
526 }
527 #endif
528 
529 /* Load/Save coefficients stuffs.
530  Coefficients manages as 24 bits little-endian pixel. */
531 #define ZYWRLE_LOAD_COEFF(pSrc,R,G,B) { \
532 	R = ((signed char*)pSrc)[2];	\
533 	G = ((signed char*)pSrc)[1];	\
534 	B = ((signed char*)pSrc)[0];	\
535 }
536 #define ZYWRLE_SAVE_COEFF(pDst,R,G,B) { \
537 	((signed char*)pDst)[2] = (signed char)R;	\
538 	((signed char*)pDst)[1] = (signed char)G;	\
539 	((signed char*)pDst)[0] = (signed char)B;	\
540 }
541 
542 /*
543  RGB <=> YUV conversion stuffs.
544  YUV coversion is explained as following formula in strict meaning:
545    Y =  0.299R + 0.587G + 0.114B (   0<=Y<=255)
546    U = -0.169R - 0.331G + 0.500B (-128<=U<=127)
547    V =  0.500R - 0.419G - 0.081B (-128<=V<=127)
548 
549  I use simple conversion RCT(reversible color transform) which is described
550  in JPEG-2000 specification.
551    Y = (R + 2G + B)/4 (   0<=Y<=255)
552    U = B-G (-256<=U<=255)
553    V = R-G (-256<=V<=255)
554 */
555 #define ROUND(x) (((x)<0)?0:(((x)>255)?255:(x)))
556 	/* RCT is N-bit RGB to N-bit Y and N+1-bit UV.
557 	 For make Same N-bit, UV is lossy.
558 	 More exact PLHarr, we reduce to odd range(-127<=x<=127). */
559 #define ZYWRLE_RGBYUV1(R,G,B,Y,U,V,ymask,uvmask) { \
560 	Y = (R+(G<<1)+B)>>2;	\
561 	U =  B-G;	\
562 	V =  R-G;	\
563 	Y -= 128;	\
564 	U >>= 1;	\
565 	V >>= 1;	\
566 	Y &= ymask;	\
567 	U &= uvmask;	\
568 	V &= uvmask;	\
569 	if (Y == -128)	\
570 		Y += (0xFFFFFFFF-ymask+1);	\
571 	if (U == -128)	\
572 		U += (0xFFFFFFFF-uvmask+1);	\
573 	if (V == -128)	\
574 		V += (0xFFFFFFFF-uvmask+1);	\
575 }
576 #define ZYWRLE_YUVRGB1(R,G,B,Y,U,V) { \
577 	Y += 128;	\
578 	U <<= 1;	\
579 	V <<= 1;	\
580 	G = Y-((U+V)>>2);	\
581 	B = U+G;	\
582 	R = V+G;	\
583 	G = ROUND(G);	\
584 	B = ROUND(B);	\
585 	R = ROUND(R);	\
586 }
587 
588 /*
589  coefficient packing/unpacking stuffs.
590  Wavelet transform makes 4 sub coefficient image from 1 original image.
591 
592  model with pyramid decomposition:
593    +------+------+
594    |      |      |
595    |  L   |  Hx  |
596    |      |      |
597    +------+------+
598    |      |      |
599    |  H   |  Hxy |
600    |      |      |
601    +------+------+
602 
603  So, we must transfer each sub images individually in strict meaning.
604  But at least ZRLE meaning, following one decompositon image is same as
605  avobe individual sub image. I use this format.
606  (Strictly saying, transfer order is reverse(Hxy->Hy->Hx->L)
607   for simplified procedure for any wavelet level.)
608 
609    +------+------+
610    |      L      |
611    +------+------+
612    |      Hx     |
613    +------+------+
614    |      Hy     |
615    +------+------+
616    |      Hxy    |
617    +------+------+
618 */
619 #define INC_PTR(data) \
620 	data++;	\
621 	if( data-pData >= (w+uw) ){	\
622 		data += scanline-(w+uw);	\
623 		pData = data;	\
624 	}
625 
626 #define ZYWRLE_TRANSFER_COEFF(pBuf,data,r,w,h,scanline,level,TRANS)	\
627 	pH = pBuf;	\
628 	s = 2<<level;	\
629 	if (r & 0x01)	\
630 		pH +=  s>>1;	\
631 	if (r & 0x02)	\
632 		pH += (s>>1)*w;	\
633 	pEnd = pH+h*w;	\
634 	while (pH < pEnd) {	\
635 		pLine = pH+w;	\
636 		while (pH < pLine) {	\
637 			TRANS	\
638 			INC_PTR(data)	\
639 			pH += s;	\
640 		}	\
641 		pH += (s-1)*w;	\
642 	}
643 
644 #define ZYWRLE_PACK_COEFF(pBuf,data,r,width,height,scanline,level)	\
645 	ZYWRLE_TRANSFER_COEFF(pBuf,data,r,width,height,scanline,level,ZYWRLE_LOAD_COEFF(pH,R,G,B);ZYWRLE_SAVE_PIXEL(data,R,G,B);)
646 
647 #define ZYWRLE_UNPACK_COEFF(pBuf,data,r,width,height,scanline,level)	\
648 	ZYWRLE_TRANSFER_COEFF(pBuf,data,r,width,height,scanline,level,ZYWRLE_LOAD_PIXEL(data,R,G,B);ZYWRLE_SAVE_COEFF(pH,R,G,B);)
649 
650 #define ZYWRLE_SAVE_UNALIGN(data,TRANS)	\
651 	pTop = pBuf+w*h;	\
652 	pEnd = pBuf + (w+uw)*(h+uh);	\
653 	while (pTop < pEnd) {	\
654 		TRANS	\
655 		INC_PTR(data)	\
656 		pTop++;	\
657 	}
658 
659 #define ZYWRLE_LOAD_UNALIGN(data,TRANS)	\
660 	pTop = pBuf+w*h;	\
661 	if (uw) {	\
662 		pData=         data + w;	\
663 		pEnd = (int*)(pData+ h*scanline);	\
664 		while (pData < (PIXEL_T*)pEnd) {	\
665 			pLine = (int*)(pData + uw);	\
666 			while (pData < (PIXEL_T*)pLine) {	\
667 				TRANS	\
668 				pData++;	\
669 				pTop++;	\
670 			}	\
671 			pData += scanline-uw;	\
672 		}	\
673 	}	\
674 	if (uh) {	\
675 		pData=         data +  h*scanline;	\
676 		pEnd = (int*)(pData+ uh*scanline);	\
677 		while (pData < (PIXEL_T*)pEnd) {	\
678 			pLine = (int*)(pData + w);	\
679 			while (pData < (PIXEL_T*)pLine) {	\
680 				TRANS	\
681 				pData++;	\
682 				pTop++;	\
683 			}	\
684 			pData += scanline-w;	\
685 		}	\
686 	}	\
687 	if (uw && uh) {	\
688 		pData=         data + w+ h*scanline;	\
689 		pEnd = (int*)(pData+   uh*scanline);	\
690 		while (pData < (PIXEL_T*)pEnd) {	\
691 			pLine = (int*)(pData + uw);	\
692 			while (pData < (PIXEL_T*)pLine) {	\
693 				TRANS	\
694 				pData++;	\
695 				pTop++;	\
696 			}	\
697 			pData += scanline-uw;	\
698 		}	\
699 	}
700 
zywrleCalcSize(int * pW,int * pH,int level)701 static InlineX void zywrleCalcSize(int* pW, int* pH, int level)
702 {
703 	*pW &= ~((1<<level)-1);
704 	*pH &= ~((1<<level)-1);
705 }
706 
707 #endif /* ZYWRLE_ONCE */
708 
709 #ifndef CPIXEL
710 #ifdef ZYWRLE_ENCODE
ZYWRLE_RGBYUV(int * pBuf,PIXEL_T * data,int width,int height,int scanline)711 static InlineX void ZYWRLE_RGBYUV(int* pBuf, PIXEL_T* data, int width, int height, int scanline)
712 {
713 	int R, G, B;
714 	int Y, U, V;
715 	int* pLine;
716 	int* pEnd;
717 	pEnd = pBuf+height*width;
718 	while (pBuf < pEnd) {
719 		pLine = pBuf+width;
720 		while (pBuf < pLine) {
721 			ZYWRLE_LOAD_PIXEL(data,R,G,B);
722 			ZYWRLE_RGBYUV1(R,G,B,Y,U,V,ZYWRLE_YMASK,ZYWRLE_UVMASK);
723 			ZYWRLE_SAVE_COEFF(pBuf,V,Y,U);
724 			pBuf++;
725 			data++;
726 		}
727 		data += scanline-width;
728 	}
729 }
730 #endif
731 #ifdef ZYWRLE_DECODE
ZYWRLE_YUVRGB(int * pBuf,PIXEL_T * data,int width,int height,int scanline)732 static InlineX void ZYWRLE_YUVRGB(int* pBuf, PIXEL_T* data, int width, int height, int scanline) {
733 	int R, G, B;
734 	int Y, U, V;
735 	int* pLine;
736 	int* pEnd;
737 	pEnd = pBuf+height*width;
738 	while (pBuf < pEnd) {
739 		pLine = pBuf+width;
740 		while (pBuf < pLine) {
741 			ZYWRLE_LOAD_COEFF(pBuf,V,Y,U);
742 			ZYWRLE_YUVRGB1(R,G,B,Y,U,V);
743 			ZYWRLE_SAVE_PIXEL(data,R,G,B);
744 			pBuf++;
745 			data++;
746 		}
747 		data += scanline-width;
748 	}
749 }
750 #endif
751 
752 #ifdef ZYWRLE_ENCODE
ZYWRLE_ANALYZE(PIXEL_T * dst,PIXEL_T * src,int w,int h,int scanline,int level,int * pBuf)753 PIXEL_T* ZYWRLE_ANALYZE(PIXEL_T* dst, PIXEL_T* src, int w, int h, int scanline, int level, int* pBuf) {
754 	int l;
755 	int uw = w;
756 	int uh = h;
757 	int* pTop;
758 	int* pEnd;
759 	int* pLine;
760 	PIXEL_T* pData;
761 	int R, G, B;
762 	int s;
763 	int* pH;
764 
765 	zywrleCalcSize(&w, &h, level);
766 	if (w == 0 || h == 0)
767 		return NULL;
768 	uw -= w;
769 	uh -= h;
770 
771 	pData = dst;
772 	ZYWRLE_LOAD_UNALIGN(src,*(PIXEL_T*)pTop=*pData;)
773 	ZYWRLE_RGBYUV(pBuf, src, w, h, scanline);
774 	Wavelet(pBuf, w, h, level);
775 	for (l = 0; l < level; l++) {
776 		ZYWRLE_PACK_COEFF(pBuf, dst, 3, w, h, scanline, l);
777 		ZYWRLE_PACK_COEFF(pBuf, dst, 2, w, h, scanline, l);
778 		ZYWRLE_PACK_COEFF(pBuf, dst, 1, w, h, scanline, l);
779 		if (l == level - 1) {
780 			ZYWRLE_PACK_COEFF(pBuf, dst, 0, w, h, scanline, l);
781 		}
782 	}
783 	ZYWRLE_SAVE_UNALIGN(dst,*dst=*(PIXEL_T*)pTop;)
784 	return dst;
785 }
786 #endif
787 #ifdef ZYWRLE_DECODE
ZYWRLE_SYNTHESIZE(PIXEL_T * dst,PIXEL_T * src,int w,int h,int scanline,int level,int * pBuf)788 PIXEL_T* ZYWRLE_SYNTHESIZE(PIXEL_T* dst, PIXEL_T* src, int w, int h, int scanline, int level, int* pBuf)
789 {
790 	int l;
791 	int uw = w;
792 	int uh = h;
793 	int* pTop;
794 	int* pEnd;
795 	int* pLine;
796 	PIXEL_T* pData;
797 	int R, G, B;
798 	int s;
799 	int* pH;
800 
801 	zywrleCalcSize(&w, &h, level);
802 	if (w == 0 || h == 0)
803 		return NULL;
804 	uw -= w;
805 	uh -= h;
806 
807 	pData = src;
808 	for (l = 0; l < level; l++) {
809 		ZYWRLE_UNPACK_COEFF(pBuf, src, 3, w, h, scanline, l);
810 		ZYWRLE_UNPACK_COEFF(pBuf, src, 2, w, h, scanline, l);
811 		ZYWRLE_UNPACK_COEFF(pBuf, src, 1, w, h, scanline, l);
812 		if (l == level - 1) {
813 			ZYWRLE_UNPACK_COEFF(pBuf, src, 0, w, h, scanline, l);
814 		}
815 	}
816 	ZYWRLE_SAVE_UNALIGN(src,*(PIXEL_T*)pTop=*src;)
817 	InvWavelet(pBuf, w, h, level);
818 	ZYWRLE_YUVRGB(pBuf, dst, w, h, scanline);
819 	ZYWRLE_LOAD_UNALIGN(dst,*pData=*(PIXEL_T*)pTop;)
820 	return src;
821 }
822 #endif
823 #endif  /* CPIXEL */
824 
825 #undef ZYWRLE_RGBYUV
826 #undef ZYWRLE_YUVRGB
827 #undef ZYWRLE_LOAD_PIXEL
828 #undef ZYWRLE_SAVE_PIXEL
829