aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/asmlib/strlen64.asm
blob: ff65c101270467f5dd7c889caf1f9e6f7c44b110 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
%include "defs.asm"

;**************************  strlen64.asm  **********************************
; Author:           Agner Fog
; Date created:     2008-07-19
; Last modified:    2008-10-16
; Description:
; Faster version of the standard strlen function:
; size_t strlen(const char * str);
; Finds the length of a zero-terminated string of bytes, optimized for speed.
;
; Overriding standard function strlen:
; The alias ?OVR_strlen is changed to _strlen in the object file if
; it is desired to override the standard library function strlen.
;
; Calling conventions: 
; Stack alignment is not required. No shadow space or red zone used.
; Called internally from strcpy and strcat without stack aligned.
;
; Optimization:
; Uses XMM registers to read 16 bytes at a time, aligned.
; Misaligned parts of the string are read from the nearest 16-bytes boundary
; and the irrelevant part masked out. It may read both before the begin of 
; the string and after the end, but will never load any unnecessary cache 
; line and never trigger a page fault for reading from non-existing memory 
; pages because it never reads past the nearest following 16-bytes boundary.
; It may, though, trigger any debug watch within the same 16-bytes boundary.
;
; The latest version of this file is available at:
; www.agner.org/optimize/asmexamples.zip
; Copyright (c) 2009 GNU General Public License www.gnu.org/licenses
;******************************************************************************

default rel

global A_strlen: function              ; Function A_strlen
global EXP(strlen): function           ; ?OVR removed if standard function strlen overridden


SECTION .text  align=16

; extern "C" int strlen (const char * s);

; 64-bit Windows version:
A_strlen:
EXP(strlen):

%IFDEF  WINDOWS
        mov      rax,  rcx             ; get pointer to string from rcx
        mov      r8,   rcx             ; copy pointer
%define Rscopy   r8                    ; Copy of s

%ELSE   ; Unix
        mov      rax,  rdi             ; get pointer to string from rdi
        mov      ecx,  edi             ; copy pointer (lower 32 bits)
%define Rscopy   rdi                   ; Copy of s
%ENDIF
        
        ; rax = s, ecx = 32 bits of s
        pxor     xmm0, xmm0            ; set to zero
        and      ecx,  0FH             ; lower 4 bits indicate misalignment
        and      rax,  -10H            ; align pointer by 16
        movdqa   xmm1, [rax]           ; read from nearest preceding boundary
        pcmpeqb  xmm1, xmm0            ; compare 16 bytes with zero
        pmovmskb edx,  xmm1            ; get one bit for each byte result
        shr      edx,  cl              ; shift out false bits
        shl      edx,  cl              ; shift back again
        bsf      edx,  edx             ; find first 1-bit
        jnz      L2                    ; found
        
        ; Main loop, search 16 bytes at a time
L1:     add      rax,  10H             ; increment pointer by 16
        movdqa   xmm1, [rax]           ; read 16 bytes aligned
        pcmpeqb  xmm1, xmm0            ; compare 16 bytes with zero
        pmovmskb edx,  xmm1            ; get one bit for each byte result
        bsf      edx,  edx             ; find first 1-bit
        ; (moving the bsf out of the loop and using test here would be faster for long strings on old processors,
        ;  but we are assuming that most strings are short, and newer processors have higher priority)
        jz       L1                    ; loop if not found
        
L2:     ; Zero-byte found. Compute string length        
        sub      rax,  Rscopy          ; subtract start address
        add      rax,  rdx             ; add byte index
        ret
        
;A_strlen ENDP