1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
|
%include "defs.asm"
;************************* instrset64.asm **********************************
; Author: Agner Fog
; Date created: 2003-12-12
; Last modified: 2013-09-11
; Source URL: www.agner.org/optimize
; Project: asmlib.zip
; Language: assembly, NASM/YASM syntax, 64 bit
;
; C++ prototype:
; extern "C" int InstructionSet (void);
;
; Description:
; This function returns an integer indicating which instruction set is
; supported by the microprocessor and operating system. A program can
; call this function to determine if a particular set of instructions can
; be used.
;
; The method used here for detecting whether XMM instructions are enabled by
; the operating system is different from the method recommended by Intel.
; The method used here has the advantage that it is independent of the
; ability of the operating system to catch invalid opcode exceptions. The
; method used here has been thoroughly tested on many different versions of
; Intel and AMD microprocessors, and is believed to work reliably. For further
; discussion of this method, see my manual "Optimizing subroutines in assembly
; language" (www.agner.org/optimize/).
;
; Copyright (c) 2003-2013 GNU General Public License www.gnu.org/licenses
;******************************************************************************
;
; ********** InstructionSet function **********
; C++ prototype:
; extern "C" int InstructionSet (void);
;
; return value:
; 0 = 80386 instruction set only
; 1 or above = MMX instructions supported
; 2 or above = conditional move and FCOMI supported
; 3 or above = SSE (XMM) supported by processor and operating system
; 4 or above = SSE2 supported
; 5 or above = SSE3 supported
; 6 or above = Supplementary SSE3
; 8 or above = SSE4.1 supported
; 9 or above = POPCNT supported
; 10 or above = SSE4.2 supported
; 11 or above = AVX supported by processor and operating system
; 12 or above = PCLMUL and AES supported
; 13 or above = AVX2 supported
; 14 or above = FMA3, F16C, BMI1, BMI2, LZCNT
; 15 or above = HLE + RTM supported
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
default rel
global InstructionSet: function
global IInstrSet
SECTION .data
align 16
IInstrSet@: ; local name to avoid problems in shared objects
IInstrSet: dd -1 ; this global variable is valid after first call
SECTION .text align=16
; ********** InstructionSet function **********
; C++ prototype:
; extern "C" int InstructionSet (void);
; return value:
; 4 or above = SSE2 supported
; 5 or above = SSE3 supported
; 6 or above = Supplementary SSE3 supported
; 8 or above = SSE4.1 supported
; 9 or above = POPCNT supported
; 10 or above = SSE4.2 supported
; 11 or above = AVX supported by processor and operating system
; 12 or above = PCLMUL and AES supported
InstructionSet:
; Check if this function has been called before
mov eax, [IInstrSet@]
test eax, eax
js FirstTime ; Negative means first time
; Early return. Has been called before
ret ; Return value is in eax
FirstTime:
push rbx
mov eax, 1
cpuid ; get features into edx and ecx
mov eax, 4 ; at least SSE2 supported in 64 bit mode
test ecx, 1 ; SSE3 support by microprocessor
jz ISEND
inc eax ; 5
bt ecx, 9 ; Suppl-SSE3 support by microprocessor
jnc ISEND
inc eax ; 6
bt ecx, 19 ; SSE4.1 support by microprocessor
jnc ISEND
mov al, 8 ; 8
bt ecx, 23 ; POPCNT support by microprocessor
jnc ISEND
inc eax ; 9
bt ecx, 20 ; SSE4.2 support by microprocessor
jnc ISEND
inc eax ; 10
; check OS support for YMM registers (AVX)
bt ecx, 27 ; OSXSAVE: XGETBV supported
jnc ISEND
push rax
push rcx
push rdx
xor ecx, ecx
db 0FH, 01H, 0D0H ; XGETBV
and eax, 6
cmp eax, 6 ; AVX support by OS
pop rdx
pop rcx
pop rax
jne ISEND
bt ecx, 28 ; AVX support by microprocessor
jnc ISEND
inc eax ; 11
bt ecx, 1 ; PCLMUL support
jnc ISEND
bt ecx, 25 ; AES support
jnc ISEND
inc eax ; 12
push rax
push rcx
mov eax, 7
xor ecx, ecx
cpuid ; check for AVX2
bt ebx, 5
pop rcx
pop rax
jnc ISEND
inc eax ; 13
; 14 or above = FMA3, F16C, BMI1, BMI2, LZCNT
bt ecx, 12 ; FMA3
jnc ISEND
bt ecx, 29 ; F16C
jnc ISEND
bt ebx, 3 ; BMI1
jnc ISEND
bt ebx, 8 ; BMI2
jnc ISEND
push rax
push rbx
push rcx
mov eax, 80000001H
cpuid
bt ecx, 5 ; LZCNT
pop rcx
pop rbx
pop rax
jnc ISEND
inc eax ; 14
ISEND: mov [IInstrSet@], eax ; save value in global variable
pop rbx
ret ; return value is in eax
;InstructionSet ENDP
|