aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/asmlib/randomah.asi
blob: ed7a0185a43fd392e720547c9b3dff64f4189f3b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
; ----------------------------- RANDOMAH.ASI ---------------------------
;
;  Author:           Agner Fog
;  Date created:     1998
;  Last modified:    2013-09-09
;  Description:
;  Assembly include file containing
;  structure/class definitions for random number generators
;
; Copyright (c) 1998-2013 GNU General Public License www.gnu.org/licenses
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; Definitions for Mersenne Twister:

TEMPERING EQU 1              ; set to 0 if no tempering (improves speed by 25%)

%if 0
; define constants for MT11213A:
MERS_N    EQU 351
MERS_M    EQU 175
MERS_R    EQU 19
MERS_A    EQU 0E4BD75F5H
MERS_U    EQU 11
MERS_S    EQU 7
MERS_T    EQU 15
MERS_L    EQU 17
MERS_B    EQU 655E5280H
MERS_C    EQU 0FFD58000H

%ELSE
; or constants for MT19937:
MERS_N    EQU 624
MERS_M    EQU 397
MERS_R    EQU 31
MERS_A    EQU 09908B0DFH
MERS_U    EQU 11
MERS_S    EQU 7
MERS_T    EQU 15
MERS_L    EQU 18
MERS_B    EQU 9D2C5680H
MERS_C    EQU 0EFC60000H

%ENDIF

LOWER_MASK EQU (1 << MERS_R) - 1             ; lower MERS_R bits
UPPER_MASK EQU -1 << MERS_R                  ; upper 32-MERS_R bits

; Define class CRandomMersenneA member data
; Must be aligned by 16.

STRUC CRandomMersenneA
.Fill1    RESD      4        ; Alignment filler
.PreInt:  RESD      4        ; premade tempered integer numbers, ready to use
.PreFlt:  RESQ      4        ; premade floating point numbers, ready to use (subtract 1.0)
          RESQ      1        ; last PreFlt unaligned overrun if MERS_N mod 4 = 1
.TmpFlt:  RESQ      1        ; temporary storage of floating point random number
.PreInx:  RESD      1        ; index to next PreInt and PreFlt number
.Instset: RESD      1        ; Instruction set
.LastInterval: RESD 1        ; Last interval length for IRandomX
.RLimit:  RESD      1        ; Rejection limit used by IRandomX
.TMB:     RESD      4        ; 4 copies of MERS_B constant
.TMC:     RESD      4        ; 4 copies of MERS_C constant
.one:     RESQ      2        ; 2 copies of 1.0 constant
.MTI:     RESD      1        ; index into MT buffer
.UMASK:   RESD      1        ; UPPER_MASK
.LMASK:   RESD      1        ; LOWER_MASK             ; constants
.MATA:    RESD      1        ; MERS_A
.wrap1:   RESD      4        ; MT buffer km wraparound
.MT:      RESD      MERS_N   ; MT history buffer (aligned by 16)
.wrap2:   RESD      4        ; MT buffer kk wraparound
%if MERS_N & 3
         ; MERS_N not divisible by 4. align by 4
          RESD      (4 - (MERS_N & 3))
%ENDIF        
endstruc ; CRandomMersenneA


; Definitions for Mother-of-all generator:

; Define class CRandomMotherA member data
; Must be aligned by 16. Preferably aligned by 64 to fit a cache line
STRUC   CRandomMotherA 
.Fill2   RESD      4         ; Alignment filler
.one     RESQ      1         ; 1.0
.Instset RESD      1         ; Instruction set
.M4      RESD      1         ; x[n-4]
.M3      RESD      1         ; x[n-3] (aligned)
.M2      RESD      1         ; x[n-2]
.M1      RESD      1         ; x[n-1]
.M0      RESD      1         ; x[n]
.MC      RESD      1         ; Carry (aligned)
.zero    RESD      1         ; Zero-extension of carry
.RanP1   RESQ      1         ; Double random number in interval [1,2)
.MF3     RESD      1         ; 2111111111 (aligned)
.MF2     RESD      1         ; 1492
.MF1     RESD      1         ; 1776
.MF0     RESD      1         ; 5115
endstruc ; CRandomMotherA

MOTHERF0 EQU 5115            ; factor 0
MOTHERF1 EQU 1776            ; factor 1
MOTHERF2 EQU 1492            ; factor 2
MOTHERF3 EQU 2111111111      ; factor 3


; ***************************************************************************
; Definitions for SFMT generator
; ***************************************************************************

; Choose Mersenne exponent.
; Higher values give longer cycle length and use more memory:
; MEXP equ    607
; MEXP equ   1279
; MEXP equ   2281
; MEXP equ   4253
  MEXP equ  11213
; MEXP equ  19937
; MEXP equ  44497

%if MEXP == 44497
SFMT_N      equ  348         ; Size of state vector
SFMT_M      equ  330         ; Position of intermediate feedback
SFMT_SL1    equ    5         ; Left shift of W[N-1], 32-bit words
SFMT_SL2    equ    3         ; Left shift of W[0], *8, 128-bit words
SFMT_SR1    equ    9         ; Right shift of W[M], 32-bit words
SFMT_SR2    equ	   3         ; Right shift of W[N-2], *8, 128-bit words
SFMT_MASK1  equ  0effffffbH  ;first DWORD of AND mask
; AND mask:
%define SFMT_MASK   0effffffbH,0dfbebfffH,0bfbf7befH,09ffd7bffH
; Period certification vector
%define 1,0,0a3ac4000H,0ecc1327aH

%elif MEXP == 19937
SFMT_N      equ  156         ; Size of state vector
SFMT_M      equ  122         ; Position of intermediate feedback
SFMT_SL1    equ   18         ; Left shift of W[N-1], 32-bit words
SFMT_SL2    equ    1         ; Left shift of W[0], *8, 128-bit words
SFMT_SR1    equ   11         ; Right shift of W[M], 32-bit words
SFMT_SR2    equ	   1         ; Right shift of W[N-2], *8, 128-bit words
SFMT_MASK1  equ  0dfffffefH  ;first DWORD of AND mask
%define SFMT_MASK   0dfffffefH,0ddfecb7fH,0bffaffffH,0bffffff6H
%define SFMT_PARITY 1,0,0,013c9e684H

%elif MEXP == 11213
SFMT_N      equ  88          ; Size of state vector
SFMT_M      equ  68          ; Position of intermediate feedback
SFMT_SL1	equ  14          ; Left shift of W[N-1], 32-bit words
SFMT_SL2	equ   3          ; Left shift of W[0], *8, 128-bit words
SFMT_SR1	equ   7          ; Right shift of W[M], 32-bit words
SFMT_SR2	equ   3          ; Right shift of W[N-2], *8, 128-bit words
SFMT_MASK1  equ  0effff7fbH  ;first DWORD of AND mask
%define SFMT_MASK	0effff7fbH,0ffffffefH,0dfdfbfffH,07fffdbfdH
%define SFMT_PARITY 1,0,0e8148000H,0d0c7afa3H

%elif MEXP == 4253
SFMT_N      equ  34          ; Size of state vector
SFMT_M      equ  17          ; Position of intermediate feedback
SFMT_SL1	equ  20          ; Left shift of W[N-1], 32-bit words
SFMT_SL2	equ  1           ; Left shift of W[0], *8, 128-bit words
SFMT_SR1	equ  7           ; Right shift of W[M], 32-bit words
SFMT_SR2	equ  1           ; Right shift of W[N-2], *8, 128-bit words
SFMT_MASK1  equ  09f7bffffH  ;first DWORD of AND mask
%define SFMT_MASK	09f7bffffH,09fffff5fH,03efffffbH,0fffff7bbH
%define SFMT_PARITY 0a8000001H,0af5390a3H,0b740b3f8H,06c11486dH

%elif MEXP == 2281
SFMT_N      equ  18          ; Size of state vector
SFMT_M      equ  12          ; Position of intermediate feedback
SFMT_SL1	equ  19          ; Left shift of W[N-1], 32-bit words
SFMT_SL2	equ   1          ; Left shift of W[0], *8, 128-bit words
SFMT_SR1	equ   5          ; Right shift of W[M], 32-bit words
SFMT_SR2	equ   1          ; Right shift of W[N-2], *8, 128-bit words
SFMT_MASK1  equ   0bff7ffbfH ;first DWORD of AND mask
%define SFMT_MASK	0bff7ffbfH,0fdfffffeH,0f7ffef7fH,0f2f7cbbfH
%define SFMT_PARITY 1,0,0,041dfa600H

%elif MEXP == 1279
SFMT_N      equ  10          ; Size of state vector
SFMT_M      equ   7          ; Position of intermediate feedback
SFMT_SL1	equ  14          ; Left shift of W[N-1], 32-bit words
SFMT_SL2	equ   3          ; Left shift of W[0], *8, 128-bit words
SFMT_SR1	equ   5          ; Right shift of W[M], 32-bit words
SFMT_SR2	equ   1          ; Right shift of W[N-2], *8, 128-bit words
SFMT_MASK1  equ   0f7fefffdH ;first DWORD of AND mask
%define SFMT_MASK	0f7fefffdH,07fefcfffH,0aff3ef3fH,0b5ffff7fH
%define SFMT_PARITY 1,0,0,020000000H

%elif MEXP == 607
SFMT_N      equ   5          ; Size of state vector
SFMT_M      equ   2          ; Position of intermediate feedback
SFMT_SL1	equ  15          ; Left shift of W[N-1], 32-bit words
SFMT_SL2	equ   3          ; Left shift of W[0], *8, 128-bit words
SFMT_SR1	equ  13          ; Right shift of W[M], 32-bit words
SFMT_SR2	equ   3          ; Right shift of W[N-2], *8, 128-bit words
SFMT_MASK1  equ   0fdff37ffH ;first DWORD of AND mask
%define SFMT_MASK	0fdff37ffH,0ef7f3f7dH,0ff777b7dH,07ff7fb2fH
%define SFMT_PARITY 1,0,0,05986f054H

%ELSE
%error MEXP must have one of the predefined values
%ENDIF

STRUC CRandomSFMTA 
.Fill3         RESD     4    ; Alignment filler

; Parameters for Mother-Of-All generator:
.M3:           RESD     1    ; x[n-3] (aligned)
               RESD     1    ; unused filler to fit the pmuludq instruction
.M2:           RESD     1    ; x[n-2]
               RESD     1    ; unused filler to fit the pmuludq instruction
.M1:           RESD     1    ; x[n-1]
               RESD     1    ; unused filler to fit the pmuludq instruction
.M0:           RESD     1    ; x[n]
.MC:           RESD     1    ; Carry (zero-extends into one)
.one:          RESQ     1    ; 1.0 (low dword = zero-extension of carry) (aligned)
.TempRan:      RESQ     1    ; Temporary random number
.MF3:          RESD     1    ; 2111111111 (aligned)
.Instset:      RESD     1    ; Instruction set
.MF2:          RESD     1    ; 1492 (MF3,MF2,MF1,MF0 interleaved with other variables to fit the pmuludq instruction)
               RESD     1    ; Filler (may be used for read-only parameter, but not for read/write parameter)
.MF1:          RESD     1    ; 1776
               RESD     1    ; Filler (may be used for read-only parameter, but not for read/write parameter)
.MF0:          RESD     1    ; 5115
               RESD     1    ; Filler (may be used for read-only parameter, but not for read/write parameter)

; Parameters for IRandomX:
.LASTINTERVAL: RESD     1    ; Last interval length for IRandomX
.RLIMIT:       RESD     1    ; Rejection limit used by IRandomX

; Parameters for SFMT generator:
.USEMOTHER:    RESD     1    ; 1 if combine with Mother-Of-All generator
.IX:           RESD     1    ; Index into state buffer for SFMT

.AMASK:        RESD     4    ; AND mask (aligned)
.STATE:        RESD SFMT_N*4 ; State vector (aligned)
endstruc ; CRandomSFMTA 


; Load offset of TARGET into ecx. Use position-independent method if necessary
%macro LOADOFFSET2ECX 1
%IFNDEF  POSITIONINDEPENDENT
        mov     ecx, %1
%ELSE
        ; get position-independent address of TARGET
        call    get_thunk_ecx
        add ecx, %1 - $
%ENDIF
%endmacro

; Load offset of TARGET into edi. Use position-independent method if necessary
%macro LOADOFFSET2EDI 1
%IFNDEF  POSITIONINDEPENDENT
        mov     edi, %1
%ELSE
        ; get position-independent address of TARGET
        call    get_thunk_edi
        add edi, %1 - $
%ENDIF
%endmacro


; ***************************************************************************
; Define registers used for function parameters, used in 64-bit mode only
; ***************************************************************************
 
%IFDEF WINDOWS
  %define par1     rcx
  %define par2     rdx
  %define par3     r8
  %define par4     r9
  %define par5     qword [rsp+32+8]   ; stack offset including shadow space
  %define par1d    ecx
  %define par2d    edx
  %define par3d    r8d
  %define par4d    r9d
  %define par5d    dword [rsp+32+8]
%ENDIF
  
%IFDEF UNIX
  %define par1     rdi
  %define par2     rsi
  %define par3     rdx
  %define par4     rcx
  %define par5     r8
  %define par1d    edi
  %define par2d    esi
  %define par3d    edx
  %define par4d    ecx
  %define par5d    r8d
%ENDIF