1 /*
2 * jsimd_i386.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander.
6 * Copyright (C) 2015, Matthieu Darbois.
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 32-bit x86 architecture.
15 */
16
17 #define JPEG_INTERNALS
18 #include "../jinclude.h"
19 #include "../jpeglib.h"
20 #include "../jsimd.h"
21 #include "../jdct.h"
22 #include "../jsimddct.h"
23 #include "jsimd.h"
24
25 /*
26 * In the PIC cases, we have no guarantee that constants will keep
27 * their alignment. This macro allows us to verify it at runtime.
28 */
29 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
30
31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32
33 static unsigned int simd_support = ~0;
34 static unsigned int simd_huffman = 1;
35
36 /*
37 * Check what SIMD accelerations are supported.
38 *
39 * FIXME: This code is racy under a multi-threaded environment.
40 */
41 LOCAL(void)
init_simd(void)42 init_simd (void)
43 {
44 char *env = NULL;
45
46 if (simd_support != ~0U)
47 return;
48
49 simd_support = jpeg_simd_cpu_support();
50
51 /* Force different settings through environment variables */
52 env = getenv("JSIMD_FORCEMMX");
53 if ((env != NULL) && (strcmp(env, "1") == 0))
54 simd_support &= JSIMD_MMX;
55 env = getenv("JSIMD_FORCE3DNOW");
56 if ((env != NULL) && (strcmp(env, "1") == 0))
57 simd_support &= JSIMD_3DNOW|JSIMD_MMX;
58 env = getenv("JSIMD_FORCESSE");
59 if ((env != NULL) && (strcmp(env, "1") == 0))
60 simd_support &= JSIMD_SSE|JSIMD_MMX;
61 env = getenv("JSIMD_FORCESSE2");
62 if ((env != NULL) && (strcmp(env, "1") == 0))
63 simd_support &= JSIMD_SSE2;
64 env = getenv("JSIMD_FORCENONE");
65 if ((env != NULL) && (strcmp(env, "1") == 0))
66 simd_support = 0;
67 env = getenv("JSIMD_NOHUFFENC");
68 if ((env != NULL) && (strcmp(env, "1") == 0))
69 simd_huffman = 0;
70 }
71
72 GLOBAL(int)
jsimd_can_rgb_ycc(void)73 jsimd_can_rgb_ycc (void)
74 {
75 init_simd();
76
77 /* The code is optimised for these values only */
78 if (BITS_IN_JSAMPLE != 8)
79 return 0;
80 if (sizeof(JDIMENSION) != 4)
81 return 0;
82 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
83 return 0;
84
85 if ((simd_support & JSIMD_SSE2) &&
86 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
87 return 1;
88 if (simd_support & JSIMD_MMX)
89 return 1;
90
91 return 0;
92 }
93
94 GLOBAL(int)
jsimd_can_rgb_gray(void)95 jsimd_can_rgb_gray (void)
96 {
97 init_simd();
98
99 /* The code is optimised for these values only */
100 if (BITS_IN_JSAMPLE != 8)
101 return 0;
102 if (sizeof(JDIMENSION) != 4)
103 return 0;
104 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
105 return 0;
106
107 if ((simd_support & JSIMD_SSE2) &&
108 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
109 return 1;
110 if (simd_support & JSIMD_MMX)
111 return 1;
112
113 return 0;
114 }
115
116 GLOBAL(int)
jsimd_can_ycc_rgb(void)117 jsimd_can_ycc_rgb (void)
118 {
119 init_simd();
120
121 /* The code is optimised for these values only */
122 if (BITS_IN_JSAMPLE != 8)
123 return 0;
124 if (sizeof(JDIMENSION) != 4)
125 return 0;
126 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
127 return 0;
128
129 if ((simd_support & JSIMD_SSE2) &&
130 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
131 return 1;
132 if (simd_support & JSIMD_MMX)
133 return 1;
134
135 return 0;
136 }
137
138 GLOBAL(int)
jsimd_can_ycc_rgb565(void)139 jsimd_can_ycc_rgb565 (void)
140 {
141 return 0;
142 }
143
144 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)145 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
146 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
147 JDIMENSION output_row, int num_rows)
148 {
149 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
150 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
151
152 switch(cinfo->in_color_space) {
153 case JCS_EXT_RGB:
154 sse2fct=jsimd_extrgb_ycc_convert_sse2;
155 mmxfct=jsimd_extrgb_ycc_convert_mmx;
156 break;
157 case JCS_EXT_RGBX:
158 case JCS_EXT_RGBA:
159 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
160 mmxfct=jsimd_extrgbx_ycc_convert_mmx;
161 break;
162 case JCS_EXT_BGR:
163 sse2fct=jsimd_extbgr_ycc_convert_sse2;
164 mmxfct=jsimd_extbgr_ycc_convert_mmx;
165 break;
166 case JCS_EXT_BGRX:
167 case JCS_EXT_BGRA:
168 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
169 mmxfct=jsimd_extbgrx_ycc_convert_mmx;
170 break;
171 case JCS_EXT_XBGR:
172 case JCS_EXT_ABGR:
173 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
174 mmxfct=jsimd_extxbgr_ycc_convert_mmx;
175 break;
176 case JCS_EXT_XRGB:
177 case JCS_EXT_ARGB:
178 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
179 mmxfct=jsimd_extxrgb_ycc_convert_mmx;
180 break;
181 default:
182 sse2fct=jsimd_rgb_ycc_convert_sse2;
183 mmxfct=jsimd_rgb_ycc_convert_mmx;
184 break;
185 }
186
187 if ((simd_support & JSIMD_SSE2) &&
188 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
189 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
190 else if (simd_support & JSIMD_MMX)
191 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
192 }
193
194 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)195 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
196 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
197 JDIMENSION output_row, int num_rows)
198 {
199 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
200 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
201
202 switch(cinfo->in_color_space) {
203 case JCS_EXT_RGB:
204 sse2fct=jsimd_extrgb_gray_convert_sse2;
205 mmxfct=jsimd_extrgb_gray_convert_mmx;
206 break;
207 case JCS_EXT_RGBX:
208 case JCS_EXT_RGBA:
209 sse2fct=jsimd_extrgbx_gray_convert_sse2;
210 mmxfct=jsimd_extrgbx_gray_convert_mmx;
211 break;
212 case JCS_EXT_BGR:
213 sse2fct=jsimd_extbgr_gray_convert_sse2;
214 mmxfct=jsimd_extbgr_gray_convert_mmx;
215 break;
216 case JCS_EXT_BGRX:
217 case JCS_EXT_BGRA:
218 sse2fct=jsimd_extbgrx_gray_convert_sse2;
219 mmxfct=jsimd_extbgrx_gray_convert_mmx;
220 break;
221 case JCS_EXT_XBGR:
222 case JCS_EXT_ABGR:
223 sse2fct=jsimd_extxbgr_gray_convert_sse2;
224 mmxfct=jsimd_extxbgr_gray_convert_mmx;
225 break;
226 case JCS_EXT_XRGB:
227 case JCS_EXT_ARGB:
228 sse2fct=jsimd_extxrgb_gray_convert_sse2;
229 mmxfct=jsimd_extxrgb_gray_convert_mmx;
230 break;
231 default:
232 sse2fct=jsimd_rgb_gray_convert_sse2;
233 mmxfct=jsimd_rgb_gray_convert_mmx;
234 break;
235 }
236
237 if ((simd_support & JSIMD_SSE2) &&
238 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
239 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
240 else if (simd_support & JSIMD_MMX)
241 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
242 }
243
244 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)245 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
246 JSAMPIMAGE input_buf, JDIMENSION input_row,
247 JSAMPARRAY output_buf, int num_rows)
248 {
249 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
250 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
251
252 switch(cinfo->out_color_space) {
253 case JCS_EXT_RGB:
254 sse2fct=jsimd_ycc_extrgb_convert_sse2;
255 mmxfct=jsimd_ycc_extrgb_convert_mmx;
256 break;
257 case JCS_EXT_RGBX:
258 case JCS_EXT_RGBA:
259 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
260 mmxfct=jsimd_ycc_extrgbx_convert_mmx;
261 break;
262 case JCS_EXT_BGR:
263 sse2fct=jsimd_ycc_extbgr_convert_sse2;
264 mmxfct=jsimd_ycc_extbgr_convert_mmx;
265 break;
266 case JCS_EXT_BGRX:
267 case JCS_EXT_BGRA:
268 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
269 mmxfct=jsimd_ycc_extbgrx_convert_mmx;
270 break;
271 case JCS_EXT_XBGR:
272 case JCS_EXT_ABGR:
273 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
274 mmxfct=jsimd_ycc_extxbgr_convert_mmx;
275 break;
276 case JCS_EXT_XRGB:
277 case JCS_EXT_ARGB:
278 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
279 mmxfct=jsimd_ycc_extxrgb_convert_mmx;
280 break;
281 default:
282 sse2fct=jsimd_ycc_rgb_convert_sse2;
283 mmxfct=jsimd_ycc_rgb_convert_mmx;
284 break;
285 }
286
287 if ((simd_support & JSIMD_SSE2) &&
288 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
289 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
290 else if (simd_support & JSIMD_MMX)
291 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292 }
293
294 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)295 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
296 JSAMPIMAGE input_buf, JDIMENSION input_row,
297 JSAMPARRAY output_buf, int num_rows)
298 {
299 }
300
301 GLOBAL(int)
jsimd_can_h2v2_downsample(void)302 jsimd_can_h2v2_downsample (void)
303 {
304 init_simd();
305
306 /* The code is optimised for these values only */
307 if (BITS_IN_JSAMPLE != 8)
308 return 0;
309 if (sizeof(JDIMENSION) != 4)
310 return 0;
311
312 if (simd_support & JSIMD_SSE2)
313 return 1;
314 if (simd_support & JSIMD_MMX)
315 return 1;
316
317 return 0;
318 }
319
320 GLOBAL(int)
jsimd_can_h2v1_downsample(void)321 jsimd_can_h2v1_downsample (void)
322 {
323 init_simd();
324
325 /* The code is optimised for these values only */
326 if (BITS_IN_JSAMPLE != 8)
327 return 0;
328 if (sizeof(JDIMENSION) != 4)
329 return 0;
330
331 if (simd_support & JSIMD_SSE2)
332 return 1;
333 if (simd_support & JSIMD_MMX)
334 return 1;
335
336 return 0;
337 }
338
339 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)340 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
341 JSAMPARRAY input_data, JSAMPARRAY output_data)
342 {
343 if (simd_support & JSIMD_SSE2)
344 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
345 compptr->v_samp_factor,
346 compptr->width_in_blocks, input_data,
347 output_data);
348 else if (simd_support & JSIMD_MMX)
349 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
350 compptr->v_samp_factor, compptr->width_in_blocks,
351 input_data, output_data);
352 }
353
354 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)355 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
356 JSAMPARRAY input_data, JSAMPARRAY output_data)
357 {
358 if (simd_support & JSIMD_SSE2)
359 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
360 compptr->v_samp_factor,
361 compptr->width_in_blocks, input_data,
362 output_data);
363 else if (simd_support & JSIMD_MMX)
364 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
365 compptr->v_samp_factor, compptr->width_in_blocks,
366 input_data, output_data);
367 }
368
369 GLOBAL(int)
jsimd_can_h2v2_upsample(void)370 jsimd_can_h2v2_upsample (void)
371 {
372 init_simd();
373
374 /* The code is optimised for these values only */
375 if (BITS_IN_JSAMPLE != 8)
376 return 0;
377 if (sizeof(JDIMENSION) != 4)
378 return 0;
379
380 if (simd_support & JSIMD_SSE2)
381 return 1;
382 if (simd_support & JSIMD_MMX)
383 return 1;
384
385 return 0;
386 }
387
388 GLOBAL(int)
jsimd_can_h2v1_upsample(void)389 jsimd_can_h2v1_upsample (void)
390 {
391 init_simd();
392
393 /* The code is optimised for these values only */
394 if (BITS_IN_JSAMPLE != 8)
395 return 0;
396 if (sizeof(JDIMENSION) != 4)
397 return 0;
398
399 if (simd_support & JSIMD_SSE2)
400 return 1;
401 if (simd_support & JSIMD_MMX)
402 return 1;
403
404 return 0;
405 }
406
407 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)408 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
409 jpeg_component_info *compptr,
410 JSAMPARRAY input_data,
411 JSAMPARRAY *output_data_ptr)
412 {
413 if (simd_support & JSIMD_SSE2)
414 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
415 input_data, output_data_ptr);
416 else if (simd_support & JSIMD_MMX)
417 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
418 input_data, output_data_ptr);
419 }
420
421 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)422 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
423 jpeg_component_info *compptr,
424 JSAMPARRAY input_data,
425 JSAMPARRAY *output_data_ptr)
426 {
427 if (simd_support & JSIMD_SSE2)
428 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
429 input_data, output_data_ptr);
430 else if (simd_support & JSIMD_MMX)
431 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
432 input_data, output_data_ptr);
433 }
434
435 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)436 jsimd_can_h2v2_fancy_upsample (void)
437 {
438 init_simd();
439
440 /* The code is optimised for these values only */
441 if (BITS_IN_JSAMPLE != 8)
442 return 0;
443 if (sizeof(JDIMENSION) != 4)
444 return 0;
445
446 if ((simd_support & JSIMD_SSE2) &&
447 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
448 return 1;
449 if (simd_support & JSIMD_MMX)
450 return 1;
451
452 return 0;
453 }
454
455 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)456 jsimd_can_h2v1_fancy_upsample (void)
457 {
458 init_simd();
459
460 /* The code is optimised for these values only */
461 if (BITS_IN_JSAMPLE != 8)
462 return 0;
463 if (sizeof(JDIMENSION) != 4)
464 return 0;
465
466 if ((simd_support & JSIMD_SSE2) &&
467 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
468 return 1;
469 if (simd_support & JSIMD_MMX)
470 return 1;
471
472 return 0;
473 }
474
475 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)476 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
477 jpeg_component_info *compptr,
478 JSAMPARRAY input_data,
479 JSAMPARRAY *output_data_ptr)
480 {
481 if ((simd_support & JSIMD_SSE2) &&
482 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
483 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
484 compptr->downsampled_width, input_data,
485 output_data_ptr);
486 else if (simd_support & JSIMD_MMX)
487 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
488 compptr->downsampled_width, input_data,
489 output_data_ptr);
490 }
491
492 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)493 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
494 jpeg_component_info *compptr,
495 JSAMPARRAY input_data,
496 JSAMPARRAY *output_data_ptr)
497 {
498 if ((simd_support & JSIMD_SSE2) &&
499 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
500 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
501 compptr->downsampled_width, input_data,
502 output_data_ptr);
503 else if (simd_support & JSIMD_MMX)
504 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
505 compptr->downsampled_width, input_data,
506 output_data_ptr);
507 }
508
509 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)510 jsimd_can_h2v2_merged_upsample (void)
511 {
512 init_simd();
513
514 /* The code is optimised for these values only */
515 if (BITS_IN_JSAMPLE != 8)
516 return 0;
517 if (sizeof(JDIMENSION) != 4)
518 return 0;
519
520 if ((simd_support & JSIMD_SSE2) &&
521 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
522 return 1;
523 if (simd_support & JSIMD_MMX)
524 return 1;
525
526 return 0;
527 }
528
529 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)530 jsimd_can_h2v1_merged_upsample (void)
531 {
532 init_simd();
533
534 /* The code is optimised for these values only */
535 if (BITS_IN_JSAMPLE != 8)
536 return 0;
537 if (sizeof(JDIMENSION) != 4)
538 return 0;
539
540 if ((simd_support & JSIMD_SSE2) &&
541 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
542 return 1;
543 if (simd_support & JSIMD_MMX)
544 return 1;
545
546 return 0;
547 }
548
549 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)550 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
551 JSAMPIMAGE input_buf,
552 JDIMENSION in_row_group_ctr,
553 JSAMPARRAY output_buf)
554 {
555 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
556 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
557
558 switch(cinfo->out_color_space) {
559 case JCS_EXT_RGB:
560 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
561 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
562 break;
563 case JCS_EXT_RGBX:
564 case JCS_EXT_RGBA:
565 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
566 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
567 break;
568 case JCS_EXT_BGR:
569 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
570 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
571 break;
572 case JCS_EXT_BGRX:
573 case JCS_EXT_BGRA:
574 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
575 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
576 break;
577 case JCS_EXT_XBGR:
578 case JCS_EXT_ABGR:
579 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
580 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
581 break;
582 case JCS_EXT_XRGB:
583 case JCS_EXT_ARGB:
584 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
585 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
586 break;
587 default:
588 sse2fct=jsimd_h2v2_merged_upsample_sse2;
589 mmxfct=jsimd_h2v2_merged_upsample_mmx;
590 break;
591 }
592
593 if ((simd_support & JSIMD_SSE2) &&
594 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
595 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
596 else if (simd_support & JSIMD_MMX)
597 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
598 }
599
600 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)601 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
602 JSAMPIMAGE input_buf,
603 JDIMENSION in_row_group_ctr,
604 JSAMPARRAY output_buf)
605 {
606 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
607 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
608
609 switch(cinfo->out_color_space) {
610 case JCS_EXT_RGB:
611 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
612 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
613 break;
614 case JCS_EXT_RGBX:
615 case JCS_EXT_RGBA:
616 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
617 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
618 break;
619 case JCS_EXT_BGR:
620 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
621 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
622 break;
623 case JCS_EXT_BGRX:
624 case JCS_EXT_BGRA:
625 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
626 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
627 break;
628 case JCS_EXT_XBGR:
629 case JCS_EXT_ABGR:
630 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
631 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
632 break;
633 case JCS_EXT_XRGB:
634 case JCS_EXT_ARGB:
635 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
636 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
637 break;
638 default:
639 sse2fct=jsimd_h2v1_merged_upsample_sse2;
640 mmxfct=jsimd_h2v1_merged_upsample_mmx;
641 break;
642 }
643
644 if ((simd_support & JSIMD_SSE2) &&
645 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
646 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
647 else if (simd_support & JSIMD_MMX)
648 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
649 }
650
651 GLOBAL(int)
jsimd_can_convsamp(void)652 jsimd_can_convsamp (void)
653 {
654 init_simd();
655
656 /* The code is optimised for these values only */
657 if (DCTSIZE != 8)
658 return 0;
659 if (BITS_IN_JSAMPLE != 8)
660 return 0;
661 if (sizeof(JDIMENSION) != 4)
662 return 0;
663 if (sizeof(DCTELEM) != 2)
664 return 0;
665
666 if (simd_support & JSIMD_SSE2)
667 return 1;
668 if (simd_support & JSIMD_MMX)
669 return 1;
670
671 return 0;
672 }
673
674 GLOBAL(int)
jsimd_can_convsamp_float(void)675 jsimd_can_convsamp_float (void)
676 {
677 init_simd();
678
679 /* The code is optimised for these values only */
680 if (DCTSIZE != 8)
681 return 0;
682 if (BITS_IN_JSAMPLE != 8)
683 return 0;
684 if (sizeof(JDIMENSION) != 4)
685 return 0;
686 if (sizeof(FAST_FLOAT) != 4)
687 return 0;
688
689 if (simd_support & JSIMD_SSE2)
690 return 1;
691 if (simd_support & JSIMD_SSE)
692 return 1;
693 if (simd_support & JSIMD_3DNOW)
694 return 1;
695
696 return 0;
697 }
698
699 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)700 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
701 DCTELEM *workspace)
702 {
703 if (simd_support & JSIMD_SSE2)
704 jsimd_convsamp_sse2(sample_data, start_col, workspace);
705 else if (simd_support & JSIMD_MMX)
706 jsimd_convsamp_mmx(sample_data, start_col, workspace);
707 }
708
709 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)710 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
711 FAST_FLOAT *workspace)
712 {
713 if (simd_support & JSIMD_SSE2)
714 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
715 else if (simd_support & JSIMD_SSE)
716 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
717 else if (simd_support & JSIMD_3DNOW)
718 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
719 }
720
721 GLOBAL(int)
jsimd_can_fdct_islow(void)722 jsimd_can_fdct_islow (void)
723 {
724 init_simd();
725
726 /* The code is optimised for these values only */
727 if (DCTSIZE != 8)
728 return 0;
729 if (sizeof(DCTELEM) != 2)
730 return 0;
731
732 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
733 return 1;
734 if (simd_support & JSIMD_MMX)
735 return 1;
736
737 return 0;
738 }
739
740 GLOBAL(int)
jsimd_can_fdct_ifast(void)741 jsimd_can_fdct_ifast (void)
742 {
743 init_simd();
744
745 /* The code is optimised for these values only */
746 if (DCTSIZE != 8)
747 return 0;
748 if (sizeof(DCTELEM) != 2)
749 return 0;
750
751 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
752 return 1;
753 if (simd_support & JSIMD_MMX)
754 return 1;
755
756 return 0;
757 }
758
759 GLOBAL(int)
jsimd_can_fdct_float(void)760 jsimd_can_fdct_float (void)
761 {
762 init_simd();
763
764 /* The code is optimised for these values only */
765 if (DCTSIZE != 8)
766 return 0;
767 if (sizeof(FAST_FLOAT) != 4)
768 return 0;
769
770 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
771 return 1;
772 if (simd_support & JSIMD_3DNOW)
773 return 1;
774
775 return 0;
776 }
777
778 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)779 jsimd_fdct_islow (DCTELEM *data)
780 {
781 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
782 jsimd_fdct_islow_sse2(data);
783 else if (simd_support & JSIMD_MMX)
784 jsimd_fdct_islow_mmx(data);
785 }
786
787 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)788 jsimd_fdct_ifast (DCTELEM *data)
789 {
790 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
791 jsimd_fdct_ifast_sse2(data);
792 else if (simd_support & JSIMD_MMX)
793 jsimd_fdct_ifast_mmx(data);
794 }
795
796 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)797 jsimd_fdct_float (FAST_FLOAT *data)
798 {
799 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
800 jsimd_fdct_float_sse(data);
801 else if (simd_support & JSIMD_3DNOW)
802 jsimd_fdct_float_3dnow(data);
803 }
804
805 GLOBAL(int)
jsimd_can_quantize(void)806 jsimd_can_quantize (void)
807 {
808 init_simd();
809
810 /* The code is optimised for these values only */
811 if (DCTSIZE != 8)
812 return 0;
813 if (sizeof(JCOEF) != 2)
814 return 0;
815 if (sizeof(DCTELEM) != 2)
816 return 0;
817
818 if (simd_support & JSIMD_SSE2)
819 return 1;
820 if (simd_support & JSIMD_MMX)
821 return 1;
822
823 return 0;
824 }
825
826 GLOBAL(int)
jsimd_can_quantize_float(void)827 jsimd_can_quantize_float (void)
828 {
829 init_simd();
830
831 /* The code is optimised for these values only */
832 if (DCTSIZE != 8)
833 return 0;
834 if (sizeof(JCOEF) != 2)
835 return 0;
836 if (sizeof(FAST_FLOAT) != 4)
837 return 0;
838
839 if (simd_support & JSIMD_SSE2)
840 return 1;
841 if (simd_support & JSIMD_SSE)
842 return 1;
843 if (simd_support & JSIMD_3DNOW)
844 return 1;
845
846 return 0;
847 }
848
849 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)850 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
851 DCTELEM *workspace)
852 {
853 if (simd_support & JSIMD_SSE2)
854 jsimd_quantize_sse2(coef_block, divisors, workspace);
855 else if (simd_support & JSIMD_MMX)
856 jsimd_quantize_mmx(coef_block, divisors, workspace);
857 }
858
859 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)860 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
861 FAST_FLOAT *workspace)
862 {
863 if (simd_support & JSIMD_SSE2)
864 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
865 else if (simd_support & JSIMD_SSE)
866 jsimd_quantize_float_sse(coef_block, divisors, workspace);
867 else if (simd_support & JSIMD_3DNOW)
868 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
869 }
870
871 GLOBAL(int)
jsimd_can_idct_2x2(void)872 jsimd_can_idct_2x2 (void)
873 {
874 init_simd();
875
876 /* The code is optimised for these values only */
877 if (DCTSIZE != 8)
878 return 0;
879 if (sizeof(JCOEF) != 2)
880 return 0;
881 if (BITS_IN_JSAMPLE != 8)
882 return 0;
883 if (sizeof(JDIMENSION) != 4)
884 return 0;
885 if (sizeof(ISLOW_MULT_TYPE) != 2)
886 return 0;
887
888 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
889 return 1;
890 if (simd_support & JSIMD_MMX)
891 return 1;
892
893 return 0;
894 }
895
896 GLOBAL(int)
jsimd_can_idct_4x4(void)897 jsimd_can_idct_4x4 (void)
898 {
899 init_simd();
900
901 /* The code is optimised for these values only */
902 if (DCTSIZE != 8)
903 return 0;
904 if (sizeof(JCOEF) != 2)
905 return 0;
906 if (BITS_IN_JSAMPLE != 8)
907 return 0;
908 if (sizeof(JDIMENSION) != 4)
909 return 0;
910 if (sizeof(ISLOW_MULT_TYPE) != 2)
911 return 0;
912
913 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
914 return 1;
915 if (simd_support & JSIMD_MMX)
916 return 1;
917
918 return 0;
919 }
920
921 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)922 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
923 JCOEFPTR coef_block, JSAMPARRAY output_buf,
924 JDIMENSION output_col)
925 {
926 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
927 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
928 output_col);
929 else if (simd_support & JSIMD_MMX)
930 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
931 }
932
933 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)934 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
935 JCOEFPTR coef_block, JSAMPARRAY output_buf,
936 JDIMENSION output_col)
937 {
938 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
939 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
940 output_col);
941 else if (simd_support & JSIMD_MMX)
942 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
943 }
944
945 GLOBAL(int)
jsimd_can_idct_islow(void)946 jsimd_can_idct_islow (void)
947 {
948 init_simd();
949
950 /* The code is optimised for these values only */
951 if (DCTSIZE != 8)
952 return 0;
953 if (sizeof(JCOEF) != 2)
954 return 0;
955 if (BITS_IN_JSAMPLE != 8)
956 return 0;
957 if (sizeof(JDIMENSION) != 4)
958 return 0;
959 if (sizeof(ISLOW_MULT_TYPE) != 2)
960 return 0;
961
962 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
963 return 1;
964 if (simd_support & JSIMD_MMX)
965 return 1;
966
967 return 0;
968 }
969
970 GLOBAL(int)
jsimd_can_idct_ifast(void)971 jsimd_can_idct_ifast (void)
972 {
973 init_simd();
974
975 /* The code is optimised for these values only */
976 if (DCTSIZE != 8)
977 return 0;
978 if (sizeof(JCOEF) != 2)
979 return 0;
980 if (BITS_IN_JSAMPLE != 8)
981 return 0;
982 if (sizeof(JDIMENSION) != 4)
983 return 0;
984 if (sizeof(IFAST_MULT_TYPE) != 2)
985 return 0;
986 if (IFAST_SCALE_BITS != 2)
987 return 0;
988
989 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
990 return 1;
991 if (simd_support & JSIMD_MMX)
992 return 1;
993
994 return 0;
995 }
996
997 GLOBAL(int)
jsimd_can_idct_float(void)998 jsimd_can_idct_float (void)
999 {
1000 init_simd();
1001
1002 if (DCTSIZE != 8)
1003 return 0;
1004 if (sizeof(JCOEF) != 2)
1005 return 0;
1006 if (BITS_IN_JSAMPLE != 8)
1007 return 0;
1008 if (sizeof(JDIMENSION) != 4)
1009 return 0;
1010 if (sizeof(FAST_FLOAT) != 4)
1011 return 0;
1012 if (sizeof(FLOAT_MULT_TYPE) != 4)
1013 return 0;
1014
1015 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1016 return 1;
1017 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1018 return 1;
1019 if (simd_support & JSIMD_3DNOW)
1020 return 1;
1021
1022 return 0;
1023 }
1024
1025 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1026 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
1027 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1028 JDIMENSION output_col)
1029 {
1030 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1031 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1032 output_col);
1033 else if (simd_support & JSIMD_MMX)
1034 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1035 output_col);
1036 }
1037
1038 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1039 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
1040 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1041 JDIMENSION output_col)
1042 {
1043 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1044 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1045 output_col);
1046 else if (simd_support & JSIMD_MMX)
1047 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1048 output_col);
1049 }
1050
1051 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1052 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
1053 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1054 JDIMENSION output_col)
1055 {
1056 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1057 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1058 output_col);
1059 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1060 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1061 output_col);
1062 else if (simd_support & JSIMD_3DNOW)
1063 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1064 output_col);
1065 }
1066
1067 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1068 jsimd_can_huff_encode_one_block (void)
1069 {
1070 init_simd();
1071
1072 if (DCTSIZE != 8)
1073 return 0;
1074 if (sizeof(JCOEF) != 2)
1075 return 0;
1076
1077 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1078 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1079 return 1;
1080
1081 return 0;
1082 }
1083
1084 GLOBAL(JOCTET*)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1085 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
1086 int last_dc_val, c_derived_tbl *dctbl,
1087 c_derived_tbl *actbl)
1088 {
1089 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1090 dctbl, actbl);
1091 }
1092