1 /*
2  * jsimd_x86_64.c
3  *
4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5  * Copyright 2009-2011, 2014 D. R. Commander
6  *
7  * Based on the x86 SIMD extension for IJG JPEG library,
8  * Copyright (C) 1999-2006, MIYASAKA Masaru.
9  * For conditions of distribution and use, see copyright notice in jsimdext.inc
10  *
11  * This file contains the interface between the "normal" portions
12  * of the library and the SIMD implementations when running on a
13  * 64-bit x86 architecture.
14  */
15 
16 #define JPEG_INTERNALS
17 #include "../jinclude.h"
18 #include "../jpeglib.h"
19 #include "../jsimd.h"
20 #include "../jdct.h"
21 #include "../jsimddct.h"
22 #include "jsimd.h"
23 
24 /*
25  * In the PIC cases, we have no guarantee that constants will keep
26  * their alignment. This macro allows us to verify it at runtime.
27  */
28 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
29 
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
31 
32 GLOBAL(int)
jsimd_can_rgb_ycc(void)33 jsimd_can_rgb_ycc (void)
34 {
35   /* The code is optimised for these values only */
36   if (BITS_IN_JSAMPLE != 8)
37     return 0;
38   if (sizeof(JDIMENSION) != 4)
39     return 0;
40   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
41     return 0;
42 
43   if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
44     return 0;
45 
46   return 1;
47 }
48 
49 GLOBAL(int)
jsimd_can_rgb_gray(void)50 jsimd_can_rgb_gray (void)
51 {
52   /* The code is optimised for these values only */
53   if (BITS_IN_JSAMPLE != 8)
54     return 0;
55   if (sizeof(JDIMENSION) != 4)
56     return 0;
57   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
58     return 0;
59 
60   if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
61     return 0;
62 
63   return 1;
64 }
65 
66 GLOBAL(int)
jsimd_can_ycc_rgb(void)67 jsimd_can_ycc_rgb (void)
68 {
69   /* The code is optimised for these values only */
70   if (BITS_IN_JSAMPLE != 8)
71     return 0;
72   if (sizeof(JDIMENSION) != 4)
73     return 0;
74   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
75     return 0;
76 
77   if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
78     return 0;
79 
80   return 1;
81 }
82 
83 GLOBAL(int)
jsimd_can_ycc_rgb565(void)84 jsimd_can_ycc_rgb565 (void)
85 {
86   return 0;
87 }
88 
89 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)90 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
91                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
92                        JDIMENSION output_row, int num_rows)
93 {
94   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
95 
96   switch(cinfo->in_color_space) {
97     case JCS_EXT_RGB:
98       sse2fct=jsimd_extrgb_ycc_convert_sse2;
99       break;
100     case JCS_EXT_RGBX:
101     case JCS_EXT_RGBA:
102       sse2fct=jsimd_extrgbx_ycc_convert_sse2;
103       break;
104     case JCS_EXT_BGR:
105       sse2fct=jsimd_extbgr_ycc_convert_sse2;
106       break;
107     case JCS_EXT_BGRX:
108     case JCS_EXT_BGRA:
109       sse2fct=jsimd_extbgrx_ycc_convert_sse2;
110       break;
111     case JCS_EXT_XBGR:
112     case JCS_EXT_ABGR:
113       sse2fct=jsimd_extxbgr_ycc_convert_sse2;
114       break;
115     case JCS_EXT_XRGB:
116     case JCS_EXT_ARGB:
117       sse2fct=jsimd_extxrgb_ycc_convert_sse2;
118       break;
119     default:
120       sse2fct=jsimd_rgb_ycc_convert_sse2;
121       break;
122   }
123 
124   sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
125 }
126 
127 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)128 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
129                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
130                         JDIMENSION output_row, int num_rows)
131 {
132   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
133 
134   switch(cinfo->in_color_space) {
135     case JCS_EXT_RGB:
136       sse2fct=jsimd_extrgb_gray_convert_sse2;
137       break;
138     case JCS_EXT_RGBX:
139     case JCS_EXT_RGBA:
140       sse2fct=jsimd_extrgbx_gray_convert_sse2;
141       break;
142     case JCS_EXT_BGR:
143       sse2fct=jsimd_extbgr_gray_convert_sse2;
144       break;
145     case JCS_EXT_BGRX:
146     case JCS_EXT_BGRA:
147       sse2fct=jsimd_extbgrx_gray_convert_sse2;
148       break;
149     case JCS_EXT_XBGR:
150     case JCS_EXT_ABGR:
151       sse2fct=jsimd_extxbgr_gray_convert_sse2;
152       break;
153     case JCS_EXT_XRGB:
154     case JCS_EXT_ARGB:
155       sse2fct=jsimd_extxrgb_gray_convert_sse2;
156       break;
157     default:
158       sse2fct=jsimd_rgb_gray_convert_sse2;
159       break;
160   }
161 
162   sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
163 }
164 
165 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)166 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
167                        JSAMPIMAGE input_buf, JDIMENSION input_row,
168                        JSAMPARRAY output_buf, int num_rows)
169 {
170   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
171 
172   switch(cinfo->out_color_space) {
173     case JCS_EXT_RGB:
174       sse2fct=jsimd_ycc_extrgb_convert_sse2;
175       break;
176     case JCS_EXT_RGBX:
177     case JCS_EXT_RGBA:
178       sse2fct=jsimd_ycc_extrgbx_convert_sse2;
179       break;
180     case JCS_EXT_BGR:
181       sse2fct=jsimd_ycc_extbgr_convert_sse2;
182       break;
183     case JCS_EXT_BGRX:
184     case JCS_EXT_BGRA:
185       sse2fct=jsimd_ycc_extbgrx_convert_sse2;
186       break;
187     case JCS_EXT_XBGR:
188     case JCS_EXT_ABGR:
189       sse2fct=jsimd_ycc_extxbgr_convert_sse2;
190       break;
191     case JCS_EXT_XRGB:
192     case JCS_EXT_ARGB:
193       sse2fct=jsimd_ycc_extxrgb_convert_sse2;
194       break;
195     default:
196       sse2fct=jsimd_ycc_rgb_convert_sse2;
197       break;
198   }
199 
200   sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
201 }
202 
203 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)204 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
205                           JSAMPIMAGE input_buf, JDIMENSION input_row,
206                           JSAMPARRAY output_buf, int num_rows)
207 {
208 }
209 
210 GLOBAL(int)
jsimd_can_h2v2_downsample(void)211 jsimd_can_h2v2_downsample (void)
212 {
213   /* The code is optimised for these values only */
214   if (BITS_IN_JSAMPLE != 8)
215     return 0;
216   if (sizeof(JDIMENSION) != 4)
217     return 0;
218 
219   return 1;
220 }
221 
222 GLOBAL(int)
jsimd_can_h2v1_downsample(void)223 jsimd_can_h2v1_downsample (void)
224 {
225   /* The code is optimised for these values only */
226   if (BITS_IN_JSAMPLE != 8)
227     return 0;
228   if (sizeof(JDIMENSION) != 4)
229     return 0;
230 
231   return 1;
232 }
233 
234 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)235 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
236                        JSAMPARRAY input_data, JSAMPARRAY output_data)
237 {
238   jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
239                              compptr->v_samp_factor, compptr->width_in_blocks,
240                              input_data, output_data);
241 }
242 
243 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)244 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
245                        JSAMPARRAY input_data, JSAMPARRAY output_data)
246 {
247   jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
248                              compptr->v_samp_factor, compptr->width_in_blocks,
249                              input_data, output_data);
250 }
251 
252 GLOBAL(int)
jsimd_can_h2v2_upsample(void)253 jsimd_can_h2v2_upsample (void)
254 {
255   /* The code is optimised for these values only */
256   if (BITS_IN_JSAMPLE != 8)
257     return 0;
258   if (sizeof(JDIMENSION) != 4)
259     return 0;
260 
261   return 1;
262 }
263 
264 GLOBAL(int)
jsimd_can_h2v1_upsample(void)265 jsimd_can_h2v1_upsample (void)
266 {
267   /* The code is optimised for these values only */
268   if (BITS_IN_JSAMPLE != 8)
269     return 0;
270   if (sizeof(JDIMENSION) != 4)
271     return 0;
272 
273   return 1;
274 }
275 
276 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)277 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
278                      jpeg_component_info * compptr,
279                      JSAMPARRAY input_data,
280                      JSAMPARRAY * output_data_ptr)
281 {
282   jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
283                            input_data, output_data_ptr);
284 }
285 
286 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)287 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
288                      jpeg_component_info * compptr,
289                      JSAMPARRAY input_data,
290                      JSAMPARRAY * output_data_ptr)
291 {
292   jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
293                            input_data, output_data_ptr);
294 }
295 
296 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)297 jsimd_can_h2v2_fancy_upsample (void)
298 {
299   /* The code is optimised for these values only */
300   if (BITS_IN_JSAMPLE != 8)
301     return 0;
302   if (sizeof(JDIMENSION) != 4)
303     return 0;
304 
305   if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
306     return 0;
307 
308   return 1;
309 }
310 
311 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)312 jsimd_can_h2v1_fancy_upsample (void)
313 {
314   /* The code is optimised for these values only */
315   if (BITS_IN_JSAMPLE != 8)
316     return 0;
317   if (sizeof(JDIMENSION) != 4)
318     return 0;
319 
320   if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
321     return 0;
322 
323   return 1;
324 }
325 
326 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)327 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
328                            jpeg_component_info * compptr,
329                            JSAMPARRAY input_data,
330                            JSAMPARRAY * output_data_ptr)
331 {
332   jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
333                                  compptr->downsampled_width, input_data,
334                                  output_data_ptr);
335 }
336 
337 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)338 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
339                            jpeg_component_info * compptr,
340                            JSAMPARRAY input_data,
341                            JSAMPARRAY * output_data_ptr)
342 {
343   jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
344                                  compptr->downsampled_width, input_data,
345                                  output_data_ptr);
346 }
347 
348 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)349 jsimd_can_h2v2_merged_upsample (void)
350 {
351   /* The code is optimised for these values only */
352   if (BITS_IN_JSAMPLE != 8)
353     return 0;
354   if (sizeof(JDIMENSION) != 4)
355     return 0;
356 
357   if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
358     return 0;
359 
360   return 1;
361 }
362 
363 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)364 jsimd_can_h2v1_merged_upsample (void)
365 {
366   /* The code is optimised for these values only */
367   if (BITS_IN_JSAMPLE != 8)
368     return 0;
369   if (sizeof(JDIMENSION) != 4)
370     return 0;
371 
372   if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
373     return 0;
374 
375   return 1;
376 }
377 
378 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)379 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
380                             JSAMPIMAGE input_buf,
381                             JDIMENSION in_row_group_ctr,
382                             JSAMPARRAY output_buf)
383 {
384   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
385 
386   switch(cinfo->out_color_space) {
387     case JCS_EXT_RGB:
388       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
389       break;
390     case JCS_EXT_RGBX:
391     case JCS_EXT_RGBA:
392       sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
393       break;
394     case JCS_EXT_BGR:
395       sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
396       break;
397     case JCS_EXT_BGRX:
398     case JCS_EXT_BGRA:
399       sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
400       break;
401     case JCS_EXT_XBGR:
402     case JCS_EXT_ABGR:
403       sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
404       break;
405     case JCS_EXT_XRGB:
406     case JCS_EXT_ARGB:
407       sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
408       break;
409     default:
410       sse2fct=jsimd_h2v2_merged_upsample_sse2;
411       break;
412   }
413 
414   sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
415 }
416 
417 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)418 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
419                             JSAMPIMAGE input_buf,
420                             JDIMENSION in_row_group_ctr,
421                             JSAMPARRAY output_buf)
422 {
423   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
424 
425   switch(cinfo->out_color_space) {
426     case JCS_EXT_RGB:
427       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
428       break;
429     case JCS_EXT_RGBX:
430     case JCS_EXT_RGBA:
431       sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
432       break;
433     case JCS_EXT_BGR:
434       sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
435       break;
436     case JCS_EXT_BGRX:
437     case JCS_EXT_BGRA:
438       sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
439       break;
440     case JCS_EXT_XBGR:
441     case JCS_EXT_ABGR:
442       sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
443       break;
444     case JCS_EXT_XRGB:
445     case JCS_EXT_ARGB:
446       sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
447       break;
448     default:
449       sse2fct=jsimd_h2v1_merged_upsample_sse2;
450       break;
451   }
452 
453   sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
454 }
455 
456 GLOBAL(int)
jsimd_can_convsamp(void)457 jsimd_can_convsamp (void)
458 {
459   /* The code is optimised for these values only */
460   if (DCTSIZE != 8)
461     return 0;
462   if (BITS_IN_JSAMPLE != 8)
463     return 0;
464   if (sizeof(JDIMENSION) != 4)
465     return 0;
466   if (sizeof(DCTELEM) != 2)
467     return 0;
468 
469   return 1;
470 }
471 
472 GLOBAL(int)
jsimd_can_convsamp_float(void)473 jsimd_can_convsamp_float (void)
474 {
475   /* The code is optimised for these values only */
476   if (DCTSIZE != 8)
477     return 0;
478   if (BITS_IN_JSAMPLE != 8)
479     return 0;
480   if (sizeof(JDIMENSION) != 4)
481     return 0;
482   if (sizeof(FAST_FLOAT) != 4)
483     return 0;
484 
485   return 1;
486 }
487 
488 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)489 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
490                 DCTELEM * workspace)
491 {
492   jsimd_convsamp_sse2(sample_data, start_col, workspace);
493 }
494 
495 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)496 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
497                       FAST_FLOAT * workspace)
498 {
499   jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
500 }
501 
502 GLOBAL(int)
jsimd_can_fdct_islow(void)503 jsimd_can_fdct_islow (void)
504 {
505   /* The code is optimised for these values only */
506   if (DCTSIZE != 8)
507     return 0;
508   if (sizeof(DCTELEM) != 2)
509     return 0;
510 
511   if (!IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
512     return 0;
513 
514   return 1;
515 }
516 
517 GLOBAL(int)
jsimd_can_fdct_ifast(void)518 jsimd_can_fdct_ifast (void)
519 {
520   /* The code is optimised for these values only */
521   if (DCTSIZE != 8)
522     return 0;
523   if (sizeof(DCTELEM) != 2)
524     return 0;
525 
526   if (!IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
527     return 0;
528 
529   return 1;
530 }
531 
532 GLOBAL(int)
jsimd_can_fdct_float(void)533 jsimd_can_fdct_float (void)
534 {
535   /* The code is optimised for these values only */
536   if (DCTSIZE != 8)
537     return 0;
538   if (sizeof(FAST_FLOAT) != 4)
539     return 0;
540 
541   if (!IS_ALIGNED_SSE(jconst_fdct_float_sse))
542     return 0;
543 
544   return 1;
545 }
546 
547 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)548 jsimd_fdct_islow (DCTELEM * data)
549 {
550   jsimd_fdct_islow_sse2(data);
551 }
552 
553 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)554 jsimd_fdct_ifast (DCTELEM * data)
555 {
556   jsimd_fdct_ifast_sse2(data);
557 }
558 
559 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)560 jsimd_fdct_float (FAST_FLOAT * data)
561 {
562   jsimd_fdct_float_sse(data);
563 }
564 
565 GLOBAL(int)
jsimd_can_quantize(void)566 jsimd_can_quantize (void)
567 {
568   /* The code is optimised for these values only */
569   if (DCTSIZE != 8)
570     return 0;
571   if (sizeof(JCOEF) != 2)
572     return 0;
573   if (sizeof(DCTELEM) != 2)
574     return 0;
575 
576   return 1;
577 }
578 
579 GLOBAL(int)
jsimd_can_quantize_float(void)580 jsimd_can_quantize_float (void)
581 {
582   /* The code is optimised for these values only */
583   if (DCTSIZE != 8)
584     return 0;
585   if (sizeof(JCOEF) != 2)
586     return 0;
587   if (sizeof(FAST_FLOAT) != 4)
588     return 0;
589 
590   return 1;
591 }
592 
593 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)594 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
595                 DCTELEM * workspace)
596 {
597   jsimd_quantize_sse2(coef_block, divisors, workspace);
598 }
599 
600 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)601 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
602                       FAST_FLOAT * workspace)
603 {
604   jsimd_quantize_float_sse2(coef_block, divisors, workspace);
605 }
606 
607 GLOBAL(int)
jsimd_can_idct_2x2(void)608 jsimd_can_idct_2x2 (void)
609 {
610   /* The code is optimised for these values only */
611   if (DCTSIZE != 8)
612     return 0;
613   if (sizeof(JCOEF) != 2)
614     return 0;
615   if (BITS_IN_JSAMPLE != 8)
616     return 0;
617   if (sizeof(JDIMENSION) != 4)
618     return 0;
619   if (sizeof(ISLOW_MULT_TYPE) != 2)
620     return 0;
621 
622   if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
623     return 0;
624 
625   return 1;
626 }
627 
628 GLOBAL(int)
jsimd_can_idct_4x4(void)629 jsimd_can_idct_4x4 (void)
630 {
631   /* The code is optimised for these values only */
632   if (DCTSIZE != 8)
633     return 0;
634   if (sizeof(JCOEF) != 2)
635     return 0;
636   if (BITS_IN_JSAMPLE != 8)
637     return 0;
638   if (sizeof(JDIMENSION) != 4)
639     return 0;
640   if (sizeof(ISLOW_MULT_TYPE) != 2)
641     return 0;
642 
643   if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
644     return 0;
645 
646   return 1;
647 }
648 
649 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)650 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
651                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
652                 JDIMENSION output_col)
653 {
654   jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
655 }
656 
657 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)658 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
659                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
660                 JDIMENSION output_col)
661 {
662   jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
663 }
664 
665 GLOBAL(int)
jsimd_can_idct_islow(void)666 jsimd_can_idct_islow (void)
667 {
668   /* The code is optimised for these values only */
669   if (DCTSIZE != 8)
670     return 0;
671   if (sizeof(JCOEF) != 2)
672     return 0;
673   if (BITS_IN_JSAMPLE != 8)
674     return 0;
675   if (sizeof(JDIMENSION) != 4)
676     return 0;
677   if (sizeof(ISLOW_MULT_TYPE) != 2)
678     return 0;
679 
680   if (!IS_ALIGNED_SSE(jconst_idct_islow_sse2))
681     return 0;
682 
683   return 1;
684 }
685 
686 GLOBAL(int)
jsimd_can_idct_ifast(void)687 jsimd_can_idct_ifast (void)
688 {
689   /* The code is optimised for these values only */
690   if (DCTSIZE != 8)
691     return 0;
692   if (sizeof(JCOEF) != 2)
693     return 0;
694   if (BITS_IN_JSAMPLE != 8)
695     return 0;
696   if (sizeof(JDIMENSION) != 4)
697     return 0;
698   if (sizeof(IFAST_MULT_TYPE) != 2)
699     return 0;
700   if (IFAST_SCALE_BITS != 2)
701     return 0;
702 
703   if (!IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
704     return 0;
705 
706   return 1;
707 }
708 
709 GLOBAL(int)
jsimd_can_idct_float(void)710 jsimd_can_idct_float (void)
711 {
712   if (DCTSIZE != 8)
713     return 0;
714   if (sizeof(JCOEF) != 2)
715     return 0;
716   if (BITS_IN_JSAMPLE != 8)
717     return 0;
718   if (sizeof(JDIMENSION) != 4)
719     return 0;
720   if (sizeof(FAST_FLOAT) != 4)
721     return 0;
722   if (sizeof(FLOAT_MULT_TYPE) != 4)
723     return 0;
724 
725   if (!IS_ALIGNED_SSE(jconst_idct_float_sse2))
726     return 0;
727 
728   return 1;
729 }
730 
731 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)732 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
733                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
734                   JDIMENSION output_col)
735 {
736   jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
737                         output_col);
738 }
739 
740 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)741 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
742                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
743                   JDIMENSION output_col)
744 {
745   jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
746                         output_col);
747 }
748 
749 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)750 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
751                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
752                   JDIMENSION output_col)
753 {
754   jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
755                         output_col);
756 }
757 
758