1;
2; jsimdcpu.asm - SIMD instruction support check
3;
4; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5; Copyright (C) 2016, D. R. Commander.
6;
7; Based on
8; x86 SIMD extension for IJG JPEG library
9; Copyright (C) 1999-2006, MIYASAKA Masaru.
10; For conditions of distribution and use, see copyright notice in jsimdext.inc
11;
12; This file should be assembled with NASM (Netwide Assembler),
13; can *not* be assembled with Microsoft's MASM or any compatible
14; assembler (including Borland's Turbo Assembler).
15; NASM is available from http://nasm.sourceforge.net/ or
16; http://sourceforge.net/project/showfiles.php?group_id=6208
17
18%include "jsimdext.inc"
19
20; --------------------------------------------------------------------------
21    SECTION     SEG_TEXT
22    BITS        64
23;
24; Check if the CPU supports SIMD instructions
25;
26; GLOBAL(unsigned int)
27; jpeg_simd_cpu_support(void)
28;
29
30    align       32
31    GLOBAL_FUNCTION(jpeg_simd_cpu_support)
32
33EXTN(jpeg_simd_cpu_support):
34    push        rbx
35    push        rdi
36
37    xor         rdi, rdi                ; simd support flag
38
39    ; Assume that all x86-64 processors support SSE & SSE2 instructions
40    or          rdi, JSIMD_SSE2
41    or          rdi, JSIMD_SSE
42
43    ; Check whether CPUID leaf 07H is supported
44    ; (leaf 07H is used to check for AVX2 instruction support)
45    mov         rax, 0
46    cpuid
47    cmp         rax, 7
48    jl          short .return           ; Maximum leaf < 07H
49
50    ; Check for AVX2 instruction support
51    mov         rax, 7
52    xor         rcx, rcx
53    cpuid
54    mov         rax, rbx                ; rax = Extended feature flags
55
56    test        rax, 1<<5               ; bit5:AVX2
57    jz          short .return
58
59    ; Check for AVX2 O/S support
60    mov         rax, 1
61    xor         rcx, rcx
62    cpuid
63    test        rcx, 1<<27
64    jz          short .return           ; O/S does not support XSAVE
65    test        rcx, 1<<28
66    jz          short .return           ; CPU does not support AVX2
67
68    xor         rcx, rcx
69    xgetbv
70    and         rax, 6
71    cmp         rax, 6                  ; O/S does not manage XMM/YMM state
72                                        ; using XSAVE
73    jnz         short .return
74
75    or          rdi, JSIMD_AVX2
76
77.return:
78    mov         rax, rdi
79
80    pop         rdi
81    pop         rbx
82    ret
83
84; For some reason, the OS X linker does not honor the request to align the
85; segment unless we do this.
86    align       32
87