aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/asmlib/popcount64.asm
blob: c4ad64e03bd41ce34832c43c0655720ed0f53a22 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
%include "defs.asm"

;*************************  popcount64.asm  ************************************
; Author:           Agner Fog
; Date created:     2011-07-20
; Last modified:    2011-07-20

; Description:
; Population count function. Counts the number of 1-bits in a 32-bit integer
; unsigned int A_popcount (unsigned int x);
;
; Position-independent code is generated if POSITIONINDEPENDENT is defined.
;
; CPU dispatching included for 386 and SSE4.2 instruction sets.
;
; Copyright (c) 2011 GNU General Public License www.gnu.org/licenses
;******************************************************************************
default rel

global A_popcount: function

; Direct entries to CPU-specific versions
global popcountGeneric: function
global popcountSSE42: function

; Imported from instrset32.asm:
extern InstructionSet                 ; Instruction set for CPU dispatcher

section .text

;******************************************************************************
;                               popcount function
;******************************************************************************


A_popcount: ; function dispatching
        jmp     near [popcountDispatch] ; Go to appropriate version, depending on instruction set

align 16
popcountSSE42: ; SSE4.2 version
%ifdef  WINDOWS
        popcnt  eax, ecx
%else
        popcnt  eax, edi
%endif        
        ret


;******************************************************************************
;                               popcount function generic
;******************************************************************************

popcountGeneric: ; Generic version
%ifdef  WINDOWS
        mov     eax, ecx
%else
        mov     eax, edi
%endif        
        mov     edx, eax
        shr     eax, 1
        and     eax, 55555555h         ; odd bits in eax, even bits in edx
        and     edx, 55555555h
        add     eax, edx
        mov     edx, eax
        shr     eax, 2
        and     eax, 33333333h
        and     edx, 33333333h
        add     eax, edx
        mov     edx, eax
        shr     eax, 4
        add     eax, edx
        and     eax, 0F0F0F0Fh
        mov     edx, eax
        shr     eax, 8
        add     eax, edx
        mov     edx, eax
        shr     eax, 16
        add     eax, edx
        and     eax, 03FH
        ret
;popcountGeneric end

; ********************************************************************************
; CPU dispatching for popcount. This is executed only once
; ********************************************************************************

%ifdef  WINDOWS
%define par1      rcx                  ; parameter 1, pointer to haystack
%else
%define par1      rdi                  ; parameter 1, pointer to haystack
%endif

popcountCPUDispatch:
        ; get supported instruction set
        push    par1
        call    InstructionSet
        pop     par1
        ; Point to generic version of strstr
        lea     rdx, [popcountGeneric]
        cmp     eax, 9                ; check popcnt supported
        jb      Q100
        ; SSE4.2 supported
        ; Point to SSE4.2 version of strstr
        lea     rdx, [popcountSSE42]
Q100:   mov     [popcountDispatch], rdx
        ; Continue in appropriate version 
        jmp     rdx

SECTION .data

; Pointer to appropriate versions. Initially point to dispatcher
popcountDispatch  DQ popcountCPUDispatch