1;
2; jsimdcpu.asm - SIMD instruction support check
3;
4; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5; Copyright (C) 2016, D. R. Commander.
6;
7; Based on
8; x86 SIMD extension for IJG JPEG library
9; Copyright (C) 1999-2006, MIYASAKA Masaru.
10; For conditions of distribution and use, see copyright notice in jsimdext.inc
11;
12; This file should be assembled with NASM (Netwide Assembler),
13; can *not* be assembled with Microsoft's MASM or any compatible
14; assembler (including Borland's Turbo Assembler).
15; NASM is available from http://nasm.sourceforge.net/ or
16; http://sourceforge.net/project/showfiles.php?group_id=6208
17;
18; [TAB8]
19
20%include "jsimdext.inc"
21
22; --------------------------------------------------------------------------
23    SECTION     SEG_TEXT
24    BITS        64
25;
26; Check if the CPU supports SIMD instructions
27;
28; GLOBAL(unsigned int)
29; jpeg_simd_cpu_support(void)
30;
31
32    align       32
33    GLOBAL_FUNCTION(jpeg_simd_cpu_support)
34
35EXTN(jpeg_simd_cpu_support):
36    push        rbx
37    push        rdi
38
39    xor         rdi, rdi                ; simd support flag
40
41    ; Check for AVX2 instruction support
42    mov         rax, 7
43    xor         rcx, rcx
44    cpuid
45    mov         rax, rbx                ; rax = Extended feature flags
46
47    or          rdi, JSIMD_SSE2
48    or          rdi, JSIMD_SSE
49    test        rax, 1<<5               ; bit5:AVX2
50    jz          short .return
51
52    ; Check for AVX2 O/S support
53    mov         rax, 1
54    xor         rcx, rcx
55    cpuid
56    test        rcx, 1<<27
57    jz          short .return           ; O/S does not support XSAVE
58    test        rcx, 1<<28
59    jz          short .return           ; CPU does not support AVX2
60
61    xor         rcx, rcx
62    xgetbv
63    and         rax, 6
64    cmp         rax, 6                  ; O/S does not manage XMM/YMM state
65                                        ; using XSAVE
66    jnz         short .return
67
68    or          rdi, JSIMD_AVX2
69
70.return:
71    mov         rax, rdi
72
73    pop         rdi
74    pop         rbx
75    ret
76
77; For some reason, the OS X linker does not honor the request to align the
78; segment unless we do this.
79    align       32
80