1 /*
2 * jsimd_x86_64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011, 2014 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * 64-bit x86 architecture.
14 */
15
16 #define JPEG_INTERNALS
17 #include "../jinclude.h"
18 #include "../jpeglib.h"
19 #include "../jsimd.h"
20 #include "../jdct.h"
21 #include "../jsimddct.h"
22 #include "jsimd.h"
23
24 /*
25 * In the PIC cases, we have no guarantee that constants will keep
26 * their alignment. This macro allows us to verify it at runtime.
27 */
28 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
29
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
31
32 GLOBAL(int)
jsimd_can_rgb_ycc(void)33 jsimd_can_rgb_ycc (void)
34 {
35 /* The code is optimised for these values only */
36 if (BITS_IN_JSAMPLE != 8)
37 return 0;
38 if (sizeof(JDIMENSION) != 4)
39 return 0;
40 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
41 return 0;
42
43 if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
44 return 0;
45
46 return 1;
47 }
48
49 GLOBAL(int)
jsimd_can_rgb_gray(void)50 jsimd_can_rgb_gray (void)
51 {
52 /* The code is optimised for these values only */
53 if (BITS_IN_JSAMPLE != 8)
54 return 0;
55 if (sizeof(JDIMENSION) != 4)
56 return 0;
57 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
58 return 0;
59
60 if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
61 return 0;
62
63 return 1;
64 }
65
66 GLOBAL(int)
jsimd_can_ycc_rgb(void)67 jsimd_can_ycc_rgb (void)
68 {
69 /* The code is optimised for these values only */
70 if (BITS_IN_JSAMPLE != 8)
71 return 0;
72 if (sizeof(JDIMENSION) != 4)
73 return 0;
74 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
75 return 0;
76
77 if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
78 return 0;
79
80 return 1;
81 }
82
83 GLOBAL(int)
jsimd_can_ycc_rgb565(void)84 jsimd_can_ycc_rgb565 (void)
85 {
86 return 0;
87 }
88
89 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)90 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
91 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
92 JDIMENSION output_row, int num_rows)
93 {
94 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
95
96 switch(cinfo->in_color_space) {
97 case JCS_EXT_RGB:
98 sse2fct=jsimd_extrgb_ycc_convert_sse2;
99 break;
100 case JCS_EXT_RGBX:
101 case JCS_EXT_RGBA:
102 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
103 break;
104 case JCS_EXT_BGR:
105 sse2fct=jsimd_extbgr_ycc_convert_sse2;
106 break;
107 case JCS_EXT_BGRX:
108 case JCS_EXT_BGRA:
109 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
110 break;
111 case JCS_EXT_XBGR:
112 case JCS_EXT_ABGR:
113 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
114 break;
115 case JCS_EXT_XRGB:
116 case JCS_EXT_ARGB:
117 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
118 break;
119 default:
120 sse2fct=jsimd_rgb_ycc_convert_sse2;
121 break;
122 }
123
124 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
125 }
126
127 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)128 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
129 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
130 JDIMENSION output_row, int num_rows)
131 {
132 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
133
134 switch(cinfo->in_color_space) {
135 case JCS_EXT_RGB:
136 sse2fct=jsimd_extrgb_gray_convert_sse2;
137 break;
138 case JCS_EXT_RGBX:
139 case JCS_EXT_RGBA:
140 sse2fct=jsimd_extrgbx_gray_convert_sse2;
141 break;
142 case JCS_EXT_BGR:
143 sse2fct=jsimd_extbgr_gray_convert_sse2;
144 break;
145 case JCS_EXT_BGRX:
146 case JCS_EXT_BGRA:
147 sse2fct=jsimd_extbgrx_gray_convert_sse2;
148 break;
149 case JCS_EXT_XBGR:
150 case JCS_EXT_ABGR:
151 sse2fct=jsimd_extxbgr_gray_convert_sse2;
152 break;
153 case JCS_EXT_XRGB:
154 case JCS_EXT_ARGB:
155 sse2fct=jsimd_extxrgb_gray_convert_sse2;
156 break;
157 default:
158 sse2fct=jsimd_rgb_gray_convert_sse2;
159 break;
160 }
161
162 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
163 }
164
165 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)166 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
167 JSAMPIMAGE input_buf, JDIMENSION input_row,
168 JSAMPARRAY output_buf, int num_rows)
169 {
170 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
171
172 switch(cinfo->out_color_space) {
173 case JCS_EXT_RGB:
174 sse2fct=jsimd_ycc_extrgb_convert_sse2;
175 break;
176 case JCS_EXT_RGBX:
177 case JCS_EXT_RGBA:
178 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
179 break;
180 case JCS_EXT_BGR:
181 sse2fct=jsimd_ycc_extbgr_convert_sse2;
182 break;
183 case JCS_EXT_BGRX:
184 case JCS_EXT_BGRA:
185 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
186 break;
187 case JCS_EXT_XBGR:
188 case JCS_EXT_ABGR:
189 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
190 break;
191 case JCS_EXT_XRGB:
192 case JCS_EXT_ARGB:
193 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
194 break;
195 default:
196 sse2fct=jsimd_ycc_rgb_convert_sse2;
197 break;
198 }
199
200 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
201 }
202
203 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)204 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
205 JSAMPIMAGE input_buf, JDIMENSION input_row,
206 JSAMPARRAY output_buf, int num_rows)
207 {
208 }
209
210 GLOBAL(int)
jsimd_can_h2v2_downsample(void)211 jsimd_can_h2v2_downsample (void)
212 {
213 /* The code is optimised for these values only */
214 if (BITS_IN_JSAMPLE != 8)
215 return 0;
216 if (sizeof(JDIMENSION) != 4)
217 return 0;
218
219 return 1;
220 }
221
222 GLOBAL(int)
jsimd_can_h2v1_downsample(void)223 jsimd_can_h2v1_downsample (void)
224 {
225 /* The code is optimised for these values only */
226 if (BITS_IN_JSAMPLE != 8)
227 return 0;
228 if (sizeof(JDIMENSION) != 4)
229 return 0;
230
231 return 1;
232 }
233
234 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)235 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
236 JSAMPARRAY input_data, JSAMPARRAY output_data)
237 {
238 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
239 compptr->v_samp_factor, compptr->width_in_blocks,
240 input_data, output_data);
241 }
242
243 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)244 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
245 JSAMPARRAY input_data, JSAMPARRAY output_data)
246 {
247 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
248 compptr->v_samp_factor, compptr->width_in_blocks,
249 input_data, output_data);
250 }
251
252 GLOBAL(int)
jsimd_can_h2v2_upsample(void)253 jsimd_can_h2v2_upsample (void)
254 {
255 /* The code is optimised for these values only */
256 if (BITS_IN_JSAMPLE != 8)
257 return 0;
258 if (sizeof(JDIMENSION) != 4)
259 return 0;
260
261 return 1;
262 }
263
264 GLOBAL(int)
jsimd_can_h2v1_upsample(void)265 jsimd_can_h2v1_upsample (void)
266 {
267 /* The code is optimised for these values only */
268 if (BITS_IN_JSAMPLE != 8)
269 return 0;
270 if (sizeof(JDIMENSION) != 4)
271 return 0;
272
273 return 1;
274 }
275
276 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)277 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
278 jpeg_component_info * compptr,
279 JSAMPARRAY input_data,
280 JSAMPARRAY * output_data_ptr)
281 {
282 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
283 input_data, output_data_ptr);
284 }
285
286 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)287 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
288 jpeg_component_info * compptr,
289 JSAMPARRAY input_data,
290 JSAMPARRAY * output_data_ptr)
291 {
292 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
293 input_data, output_data_ptr);
294 }
295
296 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)297 jsimd_can_h2v2_fancy_upsample (void)
298 {
299 /* The code is optimised for these values only */
300 if (BITS_IN_JSAMPLE != 8)
301 return 0;
302 if (sizeof(JDIMENSION) != 4)
303 return 0;
304
305 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
306 return 0;
307
308 return 1;
309 }
310
311 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)312 jsimd_can_h2v1_fancy_upsample (void)
313 {
314 /* The code is optimised for these values only */
315 if (BITS_IN_JSAMPLE != 8)
316 return 0;
317 if (sizeof(JDIMENSION) != 4)
318 return 0;
319
320 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
321 return 0;
322
323 return 1;
324 }
325
326 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)327 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
328 jpeg_component_info * compptr,
329 JSAMPARRAY input_data,
330 JSAMPARRAY * output_data_ptr)
331 {
332 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
333 compptr->downsampled_width, input_data,
334 output_data_ptr);
335 }
336
337 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)338 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
339 jpeg_component_info * compptr,
340 JSAMPARRAY input_data,
341 JSAMPARRAY * output_data_ptr)
342 {
343 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
344 compptr->downsampled_width, input_data,
345 output_data_ptr);
346 }
347
348 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)349 jsimd_can_h2v2_merged_upsample (void)
350 {
351 /* The code is optimised for these values only */
352 if (BITS_IN_JSAMPLE != 8)
353 return 0;
354 if (sizeof(JDIMENSION) != 4)
355 return 0;
356
357 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
358 return 0;
359
360 return 1;
361 }
362
363 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)364 jsimd_can_h2v1_merged_upsample (void)
365 {
366 /* The code is optimised for these values only */
367 if (BITS_IN_JSAMPLE != 8)
368 return 0;
369 if (sizeof(JDIMENSION) != 4)
370 return 0;
371
372 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
373 return 0;
374
375 return 1;
376 }
377
378 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)379 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
380 JSAMPIMAGE input_buf,
381 JDIMENSION in_row_group_ctr,
382 JSAMPARRAY output_buf)
383 {
384 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
385
386 switch(cinfo->out_color_space) {
387 case JCS_EXT_RGB:
388 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
389 break;
390 case JCS_EXT_RGBX:
391 case JCS_EXT_RGBA:
392 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
393 break;
394 case JCS_EXT_BGR:
395 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
396 break;
397 case JCS_EXT_BGRX:
398 case JCS_EXT_BGRA:
399 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
400 break;
401 case JCS_EXT_XBGR:
402 case JCS_EXT_ABGR:
403 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
404 break;
405 case JCS_EXT_XRGB:
406 case JCS_EXT_ARGB:
407 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
408 break;
409 default:
410 sse2fct=jsimd_h2v2_merged_upsample_sse2;
411 break;
412 }
413
414 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
415 }
416
417 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)418 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
419 JSAMPIMAGE input_buf,
420 JDIMENSION in_row_group_ctr,
421 JSAMPARRAY output_buf)
422 {
423 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
424
425 switch(cinfo->out_color_space) {
426 case JCS_EXT_RGB:
427 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
428 break;
429 case JCS_EXT_RGBX:
430 case JCS_EXT_RGBA:
431 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
432 break;
433 case JCS_EXT_BGR:
434 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
435 break;
436 case JCS_EXT_BGRX:
437 case JCS_EXT_BGRA:
438 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
439 break;
440 case JCS_EXT_XBGR:
441 case JCS_EXT_ABGR:
442 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
443 break;
444 case JCS_EXT_XRGB:
445 case JCS_EXT_ARGB:
446 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
447 break;
448 default:
449 sse2fct=jsimd_h2v1_merged_upsample_sse2;
450 break;
451 }
452
453 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
454 }
455
456 GLOBAL(int)
jsimd_can_convsamp(void)457 jsimd_can_convsamp (void)
458 {
459 /* The code is optimised for these values only */
460 if (DCTSIZE != 8)
461 return 0;
462 if (BITS_IN_JSAMPLE != 8)
463 return 0;
464 if (sizeof(JDIMENSION) != 4)
465 return 0;
466 if (sizeof(DCTELEM) != 2)
467 return 0;
468
469 return 1;
470 }
471
472 GLOBAL(int)
jsimd_can_convsamp_float(void)473 jsimd_can_convsamp_float (void)
474 {
475 /* The code is optimised for these values only */
476 if (DCTSIZE != 8)
477 return 0;
478 if (BITS_IN_JSAMPLE != 8)
479 return 0;
480 if (sizeof(JDIMENSION) != 4)
481 return 0;
482 if (sizeof(FAST_FLOAT) != 4)
483 return 0;
484
485 return 1;
486 }
487
488 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)489 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
490 DCTELEM * workspace)
491 {
492 jsimd_convsamp_sse2(sample_data, start_col, workspace);
493 }
494
495 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)496 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
497 FAST_FLOAT * workspace)
498 {
499 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
500 }
501
502 GLOBAL(int)
jsimd_can_fdct_islow(void)503 jsimd_can_fdct_islow (void)
504 {
505 /* The code is optimised for these values only */
506 if (DCTSIZE != 8)
507 return 0;
508 if (sizeof(DCTELEM) != 2)
509 return 0;
510
511 if (!IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
512 return 0;
513
514 return 1;
515 }
516
517 GLOBAL(int)
jsimd_can_fdct_ifast(void)518 jsimd_can_fdct_ifast (void)
519 {
520 /* The code is optimised for these values only */
521 if (DCTSIZE != 8)
522 return 0;
523 if (sizeof(DCTELEM) != 2)
524 return 0;
525
526 if (!IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
527 return 0;
528
529 return 1;
530 }
531
532 GLOBAL(int)
jsimd_can_fdct_float(void)533 jsimd_can_fdct_float (void)
534 {
535 /* The code is optimised for these values only */
536 if (DCTSIZE != 8)
537 return 0;
538 if (sizeof(FAST_FLOAT) != 4)
539 return 0;
540
541 if (!IS_ALIGNED_SSE(jconst_fdct_float_sse))
542 return 0;
543
544 return 1;
545 }
546
547 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)548 jsimd_fdct_islow (DCTELEM * data)
549 {
550 jsimd_fdct_islow_sse2(data);
551 }
552
553 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)554 jsimd_fdct_ifast (DCTELEM * data)
555 {
556 jsimd_fdct_ifast_sse2(data);
557 }
558
559 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)560 jsimd_fdct_float (FAST_FLOAT * data)
561 {
562 jsimd_fdct_float_sse(data);
563 }
564
565 GLOBAL(int)
jsimd_can_quantize(void)566 jsimd_can_quantize (void)
567 {
568 /* The code is optimised for these values only */
569 if (DCTSIZE != 8)
570 return 0;
571 if (sizeof(JCOEF) != 2)
572 return 0;
573 if (sizeof(DCTELEM) != 2)
574 return 0;
575
576 return 1;
577 }
578
579 GLOBAL(int)
jsimd_can_quantize_float(void)580 jsimd_can_quantize_float (void)
581 {
582 /* The code is optimised for these values only */
583 if (DCTSIZE != 8)
584 return 0;
585 if (sizeof(JCOEF) != 2)
586 return 0;
587 if (sizeof(FAST_FLOAT) != 4)
588 return 0;
589
590 return 1;
591 }
592
593 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)594 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
595 DCTELEM * workspace)
596 {
597 jsimd_quantize_sse2(coef_block, divisors, workspace);
598 }
599
600 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)601 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
602 FAST_FLOAT * workspace)
603 {
604 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
605 }
606
607 GLOBAL(int)
jsimd_can_idct_2x2(void)608 jsimd_can_idct_2x2 (void)
609 {
610 /* The code is optimised for these values only */
611 if (DCTSIZE != 8)
612 return 0;
613 if (sizeof(JCOEF) != 2)
614 return 0;
615 if (BITS_IN_JSAMPLE != 8)
616 return 0;
617 if (sizeof(JDIMENSION) != 4)
618 return 0;
619 if (sizeof(ISLOW_MULT_TYPE) != 2)
620 return 0;
621
622 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
623 return 0;
624
625 return 1;
626 }
627
628 GLOBAL(int)
jsimd_can_idct_4x4(void)629 jsimd_can_idct_4x4 (void)
630 {
631 /* The code is optimised for these values only */
632 if (DCTSIZE != 8)
633 return 0;
634 if (sizeof(JCOEF) != 2)
635 return 0;
636 if (BITS_IN_JSAMPLE != 8)
637 return 0;
638 if (sizeof(JDIMENSION) != 4)
639 return 0;
640 if (sizeof(ISLOW_MULT_TYPE) != 2)
641 return 0;
642
643 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
644 return 0;
645
646 return 1;
647 }
648
649 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)650 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
651 JCOEFPTR coef_block, JSAMPARRAY output_buf,
652 JDIMENSION output_col)
653 {
654 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
655 }
656
657 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)658 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
659 JCOEFPTR coef_block, JSAMPARRAY output_buf,
660 JDIMENSION output_col)
661 {
662 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
663 }
664
665 GLOBAL(int)
jsimd_can_idct_islow(void)666 jsimd_can_idct_islow (void)
667 {
668 /* The code is optimised for these values only */
669 if (DCTSIZE != 8)
670 return 0;
671 if (sizeof(JCOEF) != 2)
672 return 0;
673 if (BITS_IN_JSAMPLE != 8)
674 return 0;
675 if (sizeof(JDIMENSION) != 4)
676 return 0;
677 if (sizeof(ISLOW_MULT_TYPE) != 2)
678 return 0;
679
680 if (!IS_ALIGNED_SSE(jconst_idct_islow_sse2))
681 return 0;
682
683 return 1;
684 }
685
686 GLOBAL(int)
jsimd_can_idct_ifast(void)687 jsimd_can_idct_ifast (void)
688 {
689 /* The code is optimised for these values only */
690 if (DCTSIZE != 8)
691 return 0;
692 if (sizeof(JCOEF) != 2)
693 return 0;
694 if (BITS_IN_JSAMPLE != 8)
695 return 0;
696 if (sizeof(JDIMENSION) != 4)
697 return 0;
698 if (sizeof(IFAST_MULT_TYPE) != 2)
699 return 0;
700 if (IFAST_SCALE_BITS != 2)
701 return 0;
702
703 if (!IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
704 return 0;
705
706 return 1;
707 }
708
709 GLOBAL(int)
jsimd_can_idct_float(void)710 jsimd_can_idct_float (void)
711 {
712 if (DCTSIZE != 8)
713 return 0;
714 if (sizeof(JCOEF) != 2)
715 return 0;
716 if (BITS_IN_JSAMPLE != 8)
717 return 0;
718 if (sizeof(JDIMENSION) != 4)
719 return 0;
720 if (sizeof(FAST_FLOAT) != 4)
721 return 0;
722 if (sizeof(FLOAT_MULT_TYPE) != 4)
723 return 0;
724
725 if (!IS_ALIGNED_SSE(jconst_idct_float_sse2))
726 return 0;
727
728 return 1;
729 }
730
731 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)732 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
733 JCOEFPTR coef_block, JSAMPARRAY output_buf,
734 JDIMENSION output_col)
735 {
736 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
737 output_col);
738 }
739
740 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)741 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
742 JCOEFPTR coef_block, JSAMPARRAY output_buf,
743 JDIMENSION output_col)
744 {
745 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
746 output_col);
747 }
748
749 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)750 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
751 JCOEFPTR coef_block, JSAMPARRAY output_buf,
752 JDIMENSION output_col)
753 {
754 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
755 output_col);
756 }
757
758