1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
%include "defs.asm"
;************************* popcount64.asm ************************************
; Author: Agner Fog
; Date created: 2011-07-20
; Last modified: 2011-07-20
; Description:
; Population count function. Counts the number of 1-bits in a 32-bit integer
; unsigned int A_popcount (unsigned int x);
;
; Position-independent code is generated if POSITIONINDEPENDENT is defined.
;
; CPU dispatching included for 386 and SSE4.2 instruction sets.
;
; Copyright (c) 2011 GNU General Public License www.gnu.org/licenses
;******************************************************************************
default rel
global A_popcount: function
; Direct entries to CPU-specific versions
global popcountGeneric: function
global popcountSSE42: function
; Imported from instrset32.asm:
extern InstructionSet ; Instruction set for CPU dispatcher
section .text
;******************************************************************************
; popcount function
;******************************************************************************
A_popcount: ; function dispatching
jmp near [popcountDispatch] ; Go to appropriate version, depending on instruction set
align 16
popcountSSE42: ; SSE4.2 version
%ifdef WINDOWS
popcnt eax, ecx
%else
popcnt eax, edi
%endif
ret
;******************************************************************************
; popcount function generic
;******************************************************************************
popcountGeneric: ; Generic version
%ifdef WINDOWS
mov eax, ecx
%else
mov eax, edi
%endif
mov edx, eax
shr eax, 1
and eax, 55555555h ; odd bits in eax, even bits in edx
and edx, 55555555h
add eax, edx
mov edx, eax
shr eax, 2
and eax, 33333333h
and edx, 33333333h
add eax, edx
mov edx, eax
shr eax, 4
add eax, edx
and eax, 0F0F0F0Fh
mov edx, eax
shr eax, 8
add eax, edx
mov edx, eax
shr eax, 16
add eax, edx
and eax, 03FH
ret
;popcountGeneric end
; ********************************************************************************
; CPU dispatching for popcount. This is executed only once
; ********************************************************************************
%ifdef WINDOWS
%define par1 rcx ; parameter 1, pointer to haystack
%else
%define par1 rdi ; parameter 1, pointer to haystack
%endif
popcountCPUDispatch:
; get supported instruction set
push par1
call InstructionSet
pop par1
; Point to generic version of strstr
lea rdx, [popcountGeneric]
cmp eax, 9 ; check popcnt supported
jb Q100
; SSE4.2 supported
; Point to SSE4.2 version of strstr
lea rdx, [popcountSSE42]
Q100: mov [popcountDispatch], rdx
; Continue in appropriate version
jmp rdx
SECTION .data
; Pointer to appropriate versions. Initially point to dispatcher
popcountDispatch DQ popcountCPUDispatch
|