1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8  * Copyright (c) 2002-2014, Professor Benoit Macq
9  * Copyright (c) 2001-2003, David Janssens
10  * Copyright (c) 2002-2003, Yannick Verschueren
11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12  * Copyright (c) 2003-2014, Antonin Descampe
13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14  * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR
15  * Copyright (c) 2012, CS Systemes d'Information, France
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 #if defined(__SSE__) && !defined(_M_IX86) && !defined(__i386)
41 #define USE_SSE
42 #include <xmmintrin.h>
43 #endif
44 #if defined(__SSE2__) && !defined(_M_IX86) && !defined(__i386)
45 #define USE_SSE2
46 #include <emmintrin.h>
47 #endif
48 #if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386)
49 #define USE_SSE4
50 #include <smmintrin.h>
51 #endif
52 
53 #include "opj_includes.h"
54 
55 /* <summary> */
56 /* This table contains the norms of the basis function of the reversible MCT. */
57 /* </summary> */
58 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 };
59 
60 /* <summary> */
61 /* This table contains the norms of the basis function of the irreversible MCT. */
62 /* </summary> */
63 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 };
64 
opj_mct_get_mct_norms()65 const OPJ_FLOAT64 * opj_mct_get_mct_norms()
66 {
67     return opj_mct_norms;
68 }
69 
opj_mct_get_mct_norms_real()70 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real()
71 {
72     return opj_mct_norms_real;
73 }
74 
75 /* <summary> */
76 /* Forward reversible MCT. */
77 /* </summary> */
78 #ifdef USE_SSE2
opj_mct_encode(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)79 void opj_mct_encode(
80     OPJ_INT32* OPJ_RESTRICT c0,
81     OPJ_INT32* OPJ_RESTRICT c1,
82     OPJ_INT32* OPJ_RESTRICT c2,
83     OPJ_SIZE_T n)
84 {
85     OPJ_SIZE_T i;
86     const OPJ_SIZE_T len = n;
87     /* buffer are aligned on 16 bytes */
88     assert(((size_t)c0 & 0xf) == 0);
89     assert(((size_t)c1 & 0xf) == 0);
90     assert(((size_t)c2 & 0xf) == 0);
91 
92     for (i = 0; i < (len & ~3U); i += 4) {
93         __m128i y, u, v;
94         __m128i r = _mm_load_si128((const __m128i *) & (c0[i]));
95         __m128i g = _mm_load_si128((const __m128i *) & (c1[i]));
96         __m128i b = _mm_load_si128((const __m128i *) & (c2[i]));
97         y = _mm_add_epi32(g, g);
98         y = _mm_add_epi32(y, b);
99         y = _mm_add_epi32(y, r);
100         y = _mm_srai_epi32(y, 2);
101         u = _mm_sub_epi32(b, g);
102         v = _mm_sub_epi32(r, g);
103         _mm_store_si128((__m128i *) & (c0[i]), y);
104         _mm_store_si128((__m128i *) & (c1[i]), u);
105         _mm_store_si128((__m128i *) & (c2[i]), v);
106     }
107 
108     for (; i < len; ++i) {
109         OPJ_INT32 r = c0[i];
110         OPJ_INT32 g = c1[i];
111         OPJ_INT32 b = c2[i];
112         OPJ_INT32 y = (r + (g * 2) + b) >> 2;
113         OPJ_INT32 u = b - g;
114         OPJ_INT32 v = r - g;
115         c0[i] = y;
116         c1[i] = u;
117         c2[i] = v;
118     }
119 }
120 #else
opj_mct_encode(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)121 void opj_mct_encode(
122     OPJ_INT32* OPJ_RESTRICT c0,
123     OPJ_INT32* OPJ_RESTRICT c1,
124     OPJ_INT32* OPJ_RESTRICT c2,
125     OPJ_SIZE_T n)
126 {
127     OPJ_SIZE_T i;
128     const OPJ_SIZE_T len = n;
129 
130     for (i = 0; i < len; ++i) {
131         OPJ_INT32 r = c0[i];
132         OPJ_INT32 g = c1[i];
133         OPJ_INT32 b = c2[i];
134         OPJ_INT32 y = (r + (g * 2) + b) >> 2;
135         OPJ_INT32 u = b - g;
136         OPJ_INT32 v = r - g;
137         c0[i] = y;
138         c1[i] = u;
139         c2[i] = v;
140     }
141 }
142 #endif
143 
144 /* <summary> */
145 /* Inverse reversible MCT. */
146 /* </summary> */
147 #ifdef USE_SSE2
opj_mct_decode(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)148 void opj_mct_decode(
149     OPJ_INT32* OPJ_RESTRICT c0,
150     OPJ_INT32* OPJ_RESTRICT c1,
151     OPJ_INT32* OPJ_RESTRICT c2,
152     OPJ_SIZE_T n)
153 {
154     OPJ_SIZE_T i;
155     const OPJ_SIZE_T len = n;
156 
157     for (i = 0; i < (len & ~3U); i += 4) {
158         __m128i r, g, b;
159         __m128i y = _mm_load_si128((const __m128i *) & (c0[i]));
160         __m128i u = _mm_load_si128((const __m128i *) & (c1[i]));
161         __m128i v = _mm_load_si128((const __m128i *) & (c2[i]));
162         g = y;
163         g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2));
164         r = _mm_add_epi32(v, g);
165         b = _mm_add_epi32(u, g);
166         _mm_store_si128((__m128i *) & (c0[i]), r);
167         _mm_store_si128((__m128i *) & (c1[i]), g);
168         _mm_store_si128((__m128i *) & (c2[i]), b);
169     }
170     for (; i < len; ++i) {
171         OPJ_INT32 y = c0[i];
172         OPJ_INT32 u = c1[i];
173         OPJ_INT32 v = c2[i];
174         OPJ_INT32 g = y - ((u + v) >> 2);
175         OPJ_INT32 r = v + g;
176         OPJ_INT32 b = u + g;
177         c0[i] = r;
178         c1[i] = g;
179         c2[i] = b;
180     }
181 }
182 #else
opj_mct_decode(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)183 void opj_mct_decode(
184     OPJ_INT32* OPJ_RESTRICT c0,
185     OPJ_INT32* OPJ_RESTRICT c1,
186     OPJ_INT32* OPJ_RESTRICT c2,
187     OPJ_SIZE_T n)
188 {
189     OPJ_UINT32 i;
190     for (i = 0; i < n; ++i) {
191         OPJ_INT32 y = c0[i];
192         OPJ_INT32 u = c1[i];
193         OPJ_INT32 v = c2[i];
194         OPJ_INT32 g = y - ((u + v) >> 2);
195         OPJ_INT32 r = v + g;
196         OPJ_INT32 b = u + g;
197         c0[i] = r;
198         c1[i] = g;
199         c2[i] = b;
200     }
201 }
202 #endif
203 
204 /* <summary> */
205 /* Get norm of basis function of reversible MCT. */
206 /* </summary> */
opj_mct_getnorm(OPJ_UINT32 compno)207 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno)
208 {
209     return opj_mct_norms[compno];
210 }
211 
212 /* <summary> */
213 /* Forward irreversible MCT. */
214 /* </summary> */
215 #ifdef USE_SSE4
opj_mct_encode_real(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)216 void opj_mct_encode_real(
217     OPJ_INT32* OPJ_RESTRICT c0,
218     OPJ_INT32* OPJ_RESTRICT c1,
219     OPJ_INT32* OPJ_RESTRICT c2,
220     OPJ_SIZE_T n)
221 {
222     OPJ_SIZE_T i;
223     const OPJ_SIZE_T len = n;
224 
225     const __m128i ry = _mm_set1_epi32(2449);
226     const __m128i gy = _mm_set1_epi32(4809);
227     const __m128i by = _mm_set1_epi32(934);
228     const __m128i ru = _mm_set1_epi32(1382);
229     const __m128i gu = _mm_set1_epi32(2714);
230     /* const __m128i bu = _mm_set1_epi32(4096); */
231     /* const __m128i rv = _mm_set1_epi32(4096); */
232     const __m128i gv = _mm_set1_epi32(3430);
233     const __m128i bv = _mm_set1_epi32(666);
234     const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096),
235                              _MM_SHUFFLE(1, 0, 1, 0));
236 
237     for (i = 0; i < (len & ~3U); i += 4) {
238         __m128i lo, hi;
239         __m128i y, u, v;
240         __m128i r = _mm_load_si128((const __m128i *) & (c0[i]));
241         __m128i g = _mm_load_si128((const __m128i *) & (c1[i]));
242         __m128i b = _mm_load_si128((const __m128i *) & (c2[i]));
243 
244         lo = r;
245         hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
246         lo = _mm_mul_epi32(lo, ry);
247         hi = _mm_mul_epi32(hi, ry);
248         lo = _mm_add_epi64(lo, mulround);
249         hi = _mm_add_epi64(hi, mulround);
250         lo = _mm_srli_epi64(lo, 13);
251         hi = _mm_slli_epi64(hi, 32 - 13);
252         y = _mm_blend_epi16(lo, hi, 0xCC);
253 
254         lo = g;
255         hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
256         lo = _mm_mul_epi32(lo, gy);
257         hi = _mm_mul_epi32(hi, gy);
258         lo = _mm_add_epi64(lo, mulround);
259         hi = _mm_add_epi64(hi, mulround);
260         lo = _mm_srli_epi64(lo, 13);
261         hi = _mm_slli_epi64(hi, 32 - 13);
262         y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
263 
264         lo = b;
265         hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
266         lo = _mm_mul_epi32(lo, by);
267         hi = _mm_mul_epi32(hi, by);
268         lo = _mm_add_epi64(lo, mulround);
269         hi = _mm_add_epi64(hi, mulround);
270         lo = _mm_srli_epi64(lo, 13);
271         hi = _mm_slli_epi64(hi, 32 - 13);
272         y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
273         _mm_store_si128((__m128i *) & (c0[i]), y);
274 
275         /*lo = b;
276         hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
277         lo = _mm_mul_epi32(lo, mulround);
278         hi = _mm_mul_epi32(hi, mulround);*/
279         lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
280         hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
281         lo = _mm_slli_epi64(lo, 12);
282         hi = _mm_slli_epi64(hi, 12);
283         lo = _mm_add_epi64(lo, mulround);
284         hi = _mm_add_epi64(hi, mulround);
285         lo = _mm_srli_epi64(lo, 13);
286         hi = _mm_slli_epi64(hi, 32 - 13);
287         u = _mm_blend_epi16(lo, hi, 0xCC);
288 
289         lo = r;
290         hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
291         lo = _mm_mul_epi32(lo, ru);
292         hi = _mm_mul_epi32(hi, ru);
293         lo = _mm_add_epi64(lo, mulround);
294         hi = _mm_add_epi64(hi, mulround);
295         lo = _mm_srli_epi64(lo, 13);
296         hi = _mm_slli_epi64(hi, 32 - 13);
297         u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
298 
299         lo = g;
300         hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
301         lo = _mm_mul_epi32(lo, gu);
302         hi = _mm_mul_epi32(hi, gu);
303         lo = _mm_add_epi64(lo, mulround);
304         hi = _mm_add_epi64(hi, mulround);
305         lo = _mm_srli_epi64(lo, 13);
306         hi = _mm_slli_epi64(hi, 32 - 13);
307         u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
308         _mm_store_si128((__m128i *) & (c1[i]), u);
309 
310         /*lo = r;
311         hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
312         lo = _mm_mul_epi32(lo, mulround);
313         hi = _mm_mul_epi32(hi, mulround);*/
314         lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
315         hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
316         lo = _mm_slli_epi64(lo, 12);
317         hi = _mm_slli_epi64(hi, 12);
318         lo = _mm_add_epi64(lo, mulround);
319         hi = _mm_add_epi64(hi, mulround);
320         lo = _mm_srli_epi64(lo, 13);
321         hi = _mm_slli_epi64(hi, 32 - 13);
322         v = _mm_blend_epi16(lo, hi, 0xCC);
323 
324         lo = g;
325         hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
326         lo = _mm_mul_epi32(lo, gv);
327         hi = _mm_mul_epi32(hi, gv);
328         lo = _mm_add_epi64(lo, mulround);
329         hi = _mm_add_epi64(hi, mulround);
330         lo = _mm_srli_epi64(lo, 13);
331         hi = _mm_slli_epi64(hi, 32 - 13);
332         v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
333 
334         lo = b;
335         hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
336         lo = _mm_mul_epi32(lo, bv);
337         hi = _mm_mul_epi32(hi, bv);
338         lo = _mm_add_epi64(lo, mulround);
339         hi = _mm_add_epi64(hi, mulround);
340         lo = _mm_srli_epi64(lo, 13);
341         hi = _mm_slli_epi64(hi, 32 - 13);
342         v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
343         _mm_store_si128((__m128i *) & (c2[i]), v);
344     }
345     for (; i < len; ++i) {
346         OPJ_INT32 r = c0[i];
347         OPJ_INT32 g = c1[i];
348         OPJ_INT32 b = c2[i];
349         OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
350                        4809) + opj_int_fix_mul(b, 934);
351         OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
352                       2714) + opj_int_fix_mul(b, 4096);
353         OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
354                        3430) - opj_int_fix_mul(b, 666);
355         c0[i] = y;
356         c1[i] = u;
357         c2[i] = v;
358     }
359 }
360 #else
opj_mct_encode_real(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)361 void opj_mct_encode_real(
362     OPJ_INT32* OPJ_RESTRICT c0,
363     OPJ_INT32* OPJ_RESTRICT c1,
364     OPJ_INT32* OPJ_RESTRICT c2,
365     OPJ_SIZE_T n)
366 {
367     OPJ_UINT32 i;
368     for (i = 0; i < n; ++i) {
369         OPJ_INT32 r = c0[i];
370         OPJ_INT32 g = c1[i];
371         OPJ_INT32 b = c2[i];
372         OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
373                        4809) + opj_int_fix_mul(b, 934);
374         OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
375                       2714) + opj_int_fix_mul(b, 4096);
376         OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
377                        3430) - opj_int_fix_mul(b, 666);
378         c0[i] = y;
379         c1[i] = u;
380         c2[i] = v;
381     }
382 }
383 #endif
384 
385 /* <summary> */
386 /* Inverse irreversible MCT. */
387 /* </summary> */
opj_mct_decode_real(OPJ_FLOAT32 * OPJ_RESTRICT c0,OPJ_FLOAT32 * OPJ_RESTRICT c1,OPJ_FLOAT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)388 void opj_mct_decode_real(
389     OPJ_FLOAT32* OPJ_RESTRICT c0,
390     OPJ_FLOAT32* OPJ_RESTRICT c1,
391     OPJ_FLOAT32* OPJ_RESTRICT c2,
392     OPJ_SIZE_T n)
393 {
394     OPJ_UINT32 i;
395 #ifdef USE_SSE
396     __m128 vrv, vgu, vgv, vbu;
397     vrv = _mm_set1_ps(1.402f);
398     vgu = _mm_set1_ps(0.34413f);
399     vgv = _mm_set1_ps(0.71414f);
400     vbu = _mm_set1_ps(1.772f);
401     for (i = 0; i < (n >> 3); ++i) {
402         __m128 vy, vu, vv;
403         __m128 vr, vg, vb;
404 
405         vy = _mm_load_ps(c0);
406         vu = _mm_load_ps(c1);
407         vv = _mm_load_ps(c2);
408         vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
409         vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
410         vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
411         _mm_store_ps(c0, vr);
412         _mm_store_ps(c1, vg);
413         _mm_store_ps(c2, vb);
414         c0 += 4;
415         c1 += 4;
416         c2 += 4;
417 
418         vy = _mm_load_ps(c0);
419         vu = _mm_load_ps(c1);
420         vv = _mm_load_ps(c2);
421         vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
422         vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
423         vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
424         _mm_store_ps(c0, vr);
425         _mm_store_ps(c1, vg);
426         _mm_store_ps(c2, vb);
427         c0 += 4;
428         c1 += 4;
429         c2 += 4;
430     }
431     n &= 7;
432 #endif
433     for (i = 0; i < n; ++i) {
434         OPJ_FLOAT32 y = c0[i];
435         OPJ_FLOAT32 u = c1[i];
436         OPJ_FLOAT32 v = c2[i];
437         OPJ_FLOAT32 r = y + (v * 1.402f);
438         OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
439         OPJ_FLOAT32 b = y + (u * 1.772f);
440         c0[i] = r;
441         c1[i] = g;
442         c2[i] = b;
443     }
444 }
445 
446 /* <summary> */
447 /* Get norm of basis function of irreversible MCT. */
448 /* </summary> */
opj_mct_getnorm_real(OPJ_UINT32 compno)449 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno)
450 {
451     return opj_mct_norms_real[compno];
452 }
453 
454 
opj_mct_encode_custom(OPJ_BYTE * pCodingdata,OPJ_SIZE_T n,OPJ_BYTE ** pData,OPJ_UINT32 pNbComp,OPJ_UINT32 isSigned)455 OPJ_BOOL opj_mct_encode_custom(
456     OPJ_BYTE * pCodingdata,
457     OPJ_SIZE_T n,
458     OPJ_BYTE ** pData,
459     OPJ_UINT32 pNbComp,
460     OPJ_UINT32 isSigned)
461 {
462     OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
463     OPJ_SIZE_T i;
464     OPJ_UINT32 j;
465     OPJ_UINT32 k;
466     OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
467     OPJ_INT32 * lCurrentData = 00;
468     OPJ_INT32 * lCurrentMatrix = 00;
469     OPJ_INT32 ** lData = (OPJ_INT32 **) pData;
470     OPJ_UINT32 lMultiplicator = 1 << 13;
471     OPJ_INT32 * lMctPtr;
472 
473     OPJ_ARG_NOT_USED(isSigned);
474 
475     lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(
476             OPJ_INT32));
477     if (! lCurrentData) {
478         return OPJ_FALSE;
479     }
480 
481     lCurrentMatrix = lCurrentData + pNbComp;
482 
483     for (i = 0; i < lNbMatCoeff; ++i) {
484         lCurrentMatrix[i] = (OPJ_INT32)(*(lMct++) * (OPJ_FLOAT32)lMultiplicator);
485     }
486 
487     for (i = 0; i < n; ++i)  {
488         lMctPtr = lCurrentMatrix;
489         for (j = 0; j < pNbComp; ++j) {
490             lCurrentData[j] = (*(lData[j]));
491         }
492 
493         for (j = 0; j < pNbComp; ++j) {
494             *(lData[j]) = 0;
495             for (k = 0; k < pNbComp; ++k) {
496                 *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]);
497                 ++lMctPtr;
498             }
499 
500             ++lData[j];
501         }
502     }
503 
504     opj_free(lCurrentData);
505 
506     return OPJ_TRUE;
507 }
508 
opj_mct_decode_custom(OPJ_BYTE * pDecodingData,OPJ_SIZE_T n,OPJ_BYTE ** pData,OPJ_UINT32 pNbComp,OPJ_UINT32 isSigned)509 OPJ_BOOL opj_mct_decode_custom(
510     OPJ_BYTE * pDecodingData,
511     OPJ_SIZE_T n,
512     OPJ_BYTE ** pData,
513     OPJ_UINT32 pNbComp,
514     OPJ_UINT32 isSigned)
515 {
516     OPJ_FLOAT32 * lMct;
517     OPJ_SIZE_T i;
518     OPJ_UINT32 j;
519     OPJ_UINT32 k;
520 
521     OPJ_FLOAT32 * lCurrentData = 00;
522     OPJ_FLOAT32 * lCurrentResult = 00;
523     OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;
524 
525     OPJ_ARG_NOT_USED(isSigned);
526 
527     lCurrentData = (OPJ_FLOAT32 *) opj_malloc(2 * pNbComp * sizeof(OPJ_FLOAT32));
528     if (! lCurrentData) {
529         return OPJ_FALSE;
530     }
531     lCurrentResult = lCurrentData + pNbComp;
532 
533     for (i = 0; i < n; ++i) {
534         lMct = (OPJ_FLOAT32 *) pDecodingData;
535         for (j = 0; j < pNbComp; ++j) {
536             lCurrentData[j] = (OPJ_FLOAT32)(*(lData[j]));
537         }
538         for (j = 0; j < pNbComp; ++j) {
539             lCurrentResult[j] = 0;
540             for (k = 0; k < pNbComp; ++k) {
541                 lCurrentResult[j] += *(lMct++) * lCurrentData[k];
542             }
543             *(lData[j]++) = (OPJ_FLOAT32)(lCurrentResult[j]);
544         }
545     }
546     opj_free(lCurrentData);
547     return OPJ_TRUE;
548 }
549 
opj_calculate_norms(OPJ_FLOAT64 * pNorms,OPJ_UINT32 pNbComps,OPJ_FLOAT32 * pMatrix)550 void opj_calculate_norms(OPJ_FLOAT64 * pNorms,
551                          OPJ_UINT32 pNbComps,
552                          OPJ_FLOAT32 * pMatrix)
553 {
554     OPJ_UINT32 i, j, lIndex;
555     OPJ_FLOAT32 lCurrentValue;
556     OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;
557     OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;
558 
559     for (i = 0; i < pNbComps; ++i) {
560         lNorms[i] = 0;
561         lIndex = i;
562 
563         for (j = 0; j < pNbComps; ++j) {
564             lCurrentValue = lMatrix[lIndex];
565             lIndex += pNbComps;
566             lNorms[i] += lCurrentValue * lCurrentValue;
567         }
568         lNorms[i] = sqrt(lNorms[i]);
569     }
570 }
571