diff options
author | pnv1 <pnv@ydb.tech> | 2023-04-27 19:15:07 +0300 |
---|---|---|
committer | pnv1 <pnv@ydb.tech> | 2023-04-27 19:15:07 +0300 |
commit | a66c59109292f9e0fb44ede41adfdebe569e4df3 (patch) | |
tree | 906b3d10274afd16e8e70c61ff416bff9075422e /contrib/libs/asmglibc | |
parent | 9ca91b40d6f45546e20a646d15590c0cc6cc9778 (diff) | |
download | ydb-a66c59109292f9e0fb44ede41adfdebe569e4df3.tar.gz |
Switch to old asmlib to be able to build ydb cli without sse4
Diffstat (limited to 'contrib/libs/asmglibc')
-rw-r--r-- | contrib/libs/asmglibc/CMakeLists.darwin-x86_64.txt | 13 | ||||
-rw-r--r-- | contrib/libs/asmglibc/CMakeLists.txt | 11 | ||||
-rw-r--r-- | contrib/libs/asmglibc/memchr.S | 330 | ||||
-rw-r--r-- | contrib/libs/asmglibc/sysdep.h | 12 |
4 files changed, 366 insertions, 0 deletions
diff --git a/contrib/libs/asmglibc/CMakeLists.darwin-x86_64.txt b/contrib/libs/asmglibc/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..e2b4e37fbb --- /dev/null +++ b/contrib/libs/asmglibc/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,13 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(contrib-libs-asmglibc) +target_sources(contrib-libs-asmglibc PRIVATE + ${CMAKE_SOURCE_DIR}/contrib/libs/asmglibc/memchr.S +) diff --git a/contrib/libs/asmglibc/CMakeLists.txt b/contrib/libs/asmglibc/CMakeLists.txt new file mode 100644 index 0000000000..661b6431cc --- /dev/null +++ b/contrib/libs/asmglibc/CMakeLists.txt @@ -0,0 +1,11 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +endif() diff --git a/contrib/libs/asmglibc/memchr.S b/contrib/libs/asmglibc/memchr.S new file mode 100644 index 0000000000..b0a51115c4 --- /dev/null +++ b/contrib/libs/asmglibc/memchr.S @@ -0,0 +1,330 @@ +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "sysdep.h" + +#ifdef USE_AS_WMEMCHR +# define MEMCHR wmemchr +# define PCMPEQ pcmpeqd +#else +# define MEMCHR memchr +# define PCMPEQ pcmpeqb +#endif + +/* fast SSE2 version with using pmaxub and 64 byte loop */ + + .text +ENTRY(MEMCHR) + movd %esi, %xmm1 + mov %edi, %ecx + +#ifdef USE_AS_WMEMCHR + test %rdx, %rdx + jz L(return_null) + shl $2, %rdx +#else + punpcklbw %xmm1, %xmm1 + test %rdx, %rdx + jz L(return_null) + punpcklbw %xmm1, %xmm1 +#endif + + and $63, %ecx + pshufd $0, %xmm1, %xmm1 + + cmp $48, %ecx + ja L(crosscache) + + movdqu (%rdi), %xmm0 + PCMPEQ %xmm1, %xmm0 + pmovmskb %xmm0, %eax + test %eax, %eax + + jnz L(matches_1) + sub $16, %rdx + jbe L(return_null) + add $16, %rdi + and $15, %ecx + and $-16, %rdi + add %rcx, %rdx + sub $64, %rdx + jbe L(exit_loop) + jmp L(loop_prolog) + + .p2align 4 +L(crosscache): + and $15, %ecx + and $-16, %rdi + movdqa (%rdi), %xmm0 + + PCMPEQ %xmm1, %xmm0 +/* Check if there is a match. */ + pmovmskb %xmm0, %eax +/* Remove the leading bytes. */ + sar %cl, %eax + test %eax, %eax + je L(unaligned_no_match) +/* Check which byte is a match. */ + bsf %eax, %eax + + sub %rax, %rdx + jbe L(return_null) + add %rdi, %rax + add %rcx, %rax + ret + + .p2align 4 +L(unaligned_no_match): + /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using + "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void + possible addition overflow. */ + neg %rcx + add $16, %rcx + sub %rcx, %rdx + jbe L(return_null) + add $16, %rdi + sub $64, %rdx + jbe L(exit_loop) + + .p2align 4 +L(loop_prolog): + movdqa (%rdi), %xmm0 + PCMPEQ %xmm1, %xmm0 + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches) + + movdqa 16(%rdi), %xmm2 + PCMPEQ %xmm1, %xmm2 + pmovmskb %xmm2, %eax + test %eax, %eax + jnz L(matches16) + + movdqa 32(%rdi), %xmm3 + PCMPEQ %xmm1, %xmm3 + pmovmskb %xmm3, %eax + test %eax, %eax + jnz L(matches32) + + movdqa 48(%rdi), %xmm4 + PCMPEQ %xmm1, %xmm4 + add $64, %rdi + pmovmskb %xmm4, %eax + test %eax, %eax + jnz L(matches0) + + test $0x3f, %rdi + jz L(align64_loop) + + sub $64, %rdx + jbe L(exit_loop) + + movdqa (%rdi), %xmm0 + PCMPEQ %xmm1, %xmm0 + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches) + + movdqa 16(%rdi), %xmm2 + PCMPEQ %xmm1, %xmm2 + pmovmskb %xmm2, %eax + test %eax, %eax + jnz L(matches16) + + movdqa 32(%rdi), %xmm3 + PCMPEQ %xmm1, %xmm3 + pmovmskb %xmm3, %eax + test %eax, %eax + jnz L(matches32) + + movdqa 48(%rdi), %xmm3 + PCMPEQ %xmm1, %xmm3 + pmovmskb %xmm3, %eax + + add $64, %rdi + test %eax, %eax + jnz L(matches0) + + mov %rdi, %rcx + and $-64, %rdi + and $63, %ecx + add %rcx, %rdx + + .p2align 4 +L(align64_loop): + sub $64, %rdx + jbe L(exit_loop) + movdqa (%rdi), %xmm0 + movdqa 16(%rdi), %xmm2 + movdqa 32(%rdi), %xmm3 + movdqa 48(%rdi), %xmm4 + + PCMPEQ %xmm1, %xmm0 + PCMPEQ %xmm1, %xmm2 + PCMPEQ %xmm1, %xmm3 + PCMPEQ %xmm1, %xmm4 + + pmaxub %xmm0, %xmm3 + pmaxub %xmm2, %xmm4 + pmaxub %xmm3, %xmm4 + pmovmskb %xmm4, %eax + + add $64, %rdi + + test %eax, %eax + jz L(align64_loop) + + sub $64, %rdi + + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches) + + pmovmskb %xmm2, %eax + test %eax, %eax + jnz L(matches16) + + movdqa 32(%rdi), %xmm3 + PCMPEQ %xmm1, %xmm3 + + PCMPEQ 48(%rdi), %xmm1 + pmovmskb %xmm3, %eax + test %eax, %eax + jnz L(matches32) + + pmovmskb %xmm1, %eax + bsf %eax, %eax + lea 48(%rdi, %rax), %rax + ret + + .p2align 4 +L(exit_loop): + add $32, %edx + jle L(exit_loop_32) + + movdqa (%rdi), %xmm0 + PCMPEQ %xmm1, %xmm0 + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches) + + movdqa 16(%rdi), %xmm2 + PCMPEQ %xmm1, %xmm2 + pmovmskb %xmm2, %eax + test %eax, %eax + jnz L(matches16) + + movdqa 32(%rdi), %xmm3 + PCMPEQ %xmm1, %xmm3 + pmovmskb %xmm3, %eax + test %eax, %eax + jnz L(matches32_1) + sub $16, %edx + jle L(return_null) + + PCMPEQ 48(%rdi), %xmm1 + pmovmskb %xmm1, %eax + test %eax, %eax + jnz L(matches48_1) + xor %eax, %eax + ret + + .p2align 4 +L(exit_loop_32): + add $32, %edx + movdqa (%rdi), %xmm0 + PCMPEQ %xmm1, %xmm0 + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches_1) + sub $16, %edx + jbe L(return_null) + + PCMPEQ 16(%rdi), %xmm1 + pmovmskb %xmm1, %eax + test %eax, %eax + jnz L(matches16_1) + xor %eax, %eax + ret + + .p2align 4 +L(matches0): + bsf %eax, %eax + lea -16(%rax, %rdi), %rax + ret + + .p2align 4 +L(matches): + bsf %eax, %eax + add %rdi, %rax + ret + + .p2align 4 +L(matches16): + bsf %eax, %eax + lea 16(%rax, %rdi), %rax + ret + + .p2align 4 +L(matches32): + bsf %eax, %eax + lea 32(%rax, %rdi), %rax + ret + + .p2align 4 +L(matches_1): + bsf %eax, %eax + sub %rax, %rdx + jbe L(return_null) + add %rdi, %rax + ret + + .p2align 4 +L(matches16_1): + bsf %eax, %eax + sub %rax, %rdx + jbe L(return_null) + lea 16(%rdi, %rax), %rax + ret + + .p2align 4 +L(matches32_1): + bsf %eax, %eax + sub %rax, %rdx + jbe L(return_null) + lea 32(%rdi, %rax), %rax + ret + + .p2align 4 +L(matches48_1): + bsf %eax, %eax + sub %rax, %rdx + jbe L(return_null) + lea 48(%rdi, %rax), %rax + ret + + .p2align 4 +L(return_null): + xor %eax, %eax + ret +END(MEMCHR) + +#ifndef USE_AS_WMEMCHR +strong_alias (memchr, __memchr) +libc_hidden_builtin_def(memchr) +#endif
\ No newline at end of file diff --git a/contrib/libs/asmglibc/sysdep.h b/contrib/libs/asmglibc/sysdep.h new file mode 100644 index 0000000000..1cfb71673e --- /dev/null +++ b/contrib/libs/asmglibc/sysdep.h @@ -0,0 +1,12 @@ +#if defined(__APPLE__) + #define ENTRY(X) .globl _## X; .align 1<<3; _ ## X: + #define END(X) + #define L(X) L ## X +#else + #define ENTRY(X) .globl X; .type X,@function; .align 1<<4; X: .cfi_startproc; + #define END(X) .cfi_endproc; .size X,.-X; + #define L(X) .L ## X +#endif + +#define libc_hidden_builtin_def(X) +#define strong_alias(X, Y) |