diff options
author | thegeorg <thegeorg@yandex-team.com> | 2024-05-31 19:32:06 +0300 |
---|---|---|
committer | thegeorg <thegeorg@yandex-team.com> | 2024-05-31 19:42:52 +0300 |
commit | 0d2f4a9edd041501cbbd09a76ade7a4b977e86af (patch) | |
tree | bfb9bf4e74848360f403ccd2d4f28431e020c1a9 | |
parent | 01178e24a1066fe3335a0b1442d1017ca82f1a0c (diff) | |
download | ydb-0d2f4a9edd041501cbbd09a76ade7a4b977e86af.tar.gz |
Update contrib/libs/lzma to 5.6.2
Keep "Update contrib/libs/lzma to 5.6.1" as an independent commit.
0cb9e331dfdcd6329e9a8211b4b89e280df9aa03
155 files changed, 4547 insertions, 1619 deletions
diff --git a/contrib/libs/lzma/AUTHORS b/contrib/libs/lzma/AUTHORS index 69bbfc3ef6..5eff238ae4 100644 --- a/contrib/libs/lzma/AUTHORS +++ b/contrib/libs/lzma/AUTHORS @@ -2,22 +2,20 @@ Authors of XZ Utils =================== - XZ Utils is developed and maintained by Lasse Collin - <lasse.collin@tukaani.org> and Jia Tan <jiat0218@gmail.com>. + XZ Utils is developed and maintained by + Lasse Collin <lasse.collin@tukaani.org>. Major parts of liblzma are based on code written by Igor Pavlov, specifically the LZMA SDK <https://7-zip.org/sdk.html>. Without this code, XZ Utils wouldn't exist. - The SHA-256 implementation in liblzma is based on the code found from - 7-Zip <https://7-zip.org/>, which has a modified version of the SHA-256 - code found from Crypto++ <https://www.cryptopp.com/>. The SHA-256 code - in Crypto++ was written by Kevin Springle and Wei Dai. + The SHA-256 implementation in liblzma is based on code written by + Wei Dai in Crypto++ Library <https://www.cryptopp.com/>. - Some scripts have been adapted from gzip. The original versions - were written by Jean-loup Gailly, Charles Levert, and Paul Eggert. - Andrew Dudman helped adapting the scripts and their man pages for - XZ Utils. + A few scripts have been adapted from GNU gzip. The original + versions were written by Jean-loup Gailly, Charles Levert, and + Paul Eggert. Andrew Dudman helped adapting the scripts and their + man pages for XZ Utils. The initial version of the threaded .xz decompressor was written by Sebastian Andrzej Siewior. @@ -25,15 +23,36 @@ Authors of XZ Utils The initial version of the .lz (lzip) decoder was written by Michał Górny. - CLMUL-accelerated CRC code was contributed by Ilya Kurdyukov. + Architecture-specific CRC optimizations were contributed by + Ilya Kurdyukov, Hans Jansen, and Chenxi Mao. Other authors: - Jonathan Nieder - Joachim Henke - The GNU Autotools-based build system contains files from many authors, - which I'm not trying to list here. + Special author: Jia Tan was a co-maintainer in 2022-2024. He and + the team behind him inserted a backdoor (CVE-2024-3094) into + XZ Utils 5.6.0 and 5.6.1 releases. He suddenly disappeared when + this was discovered. - Several people have contributed fixes or reported bugs. Most of them - are mentioned in the file THANKS. + Many people have contributed improvements or reported bugs. + Most of these people are mentioned in the file THANKS. + + The translations of the command line tools and man pages have been + contributed by many people via the Translation Project: + + - https://translationproject.org/domain/xz.html + - https://translationproject.org/domain/xz-man.html + + The authors of the translated man pages are in the header comments + of the man page files. In the source package, the authors of the + translations are in po/*.po and po4a/*.po files. + + Third-party code whose authors aren't listed here: + + - GNU getopt_long() in the 'lib' directory is included for + platforms that don't have a usable getopt_long(). + + - The build system files from GNU Autoconf, GNU Automake, + GNU Libtool, GNU Gettext, Autoconf Archive, and related files. diff --git a/contrib/libs/lzma/COPYING b/contrib/libs/lzma/COPYING index 4990c5f522..aed2153149 100644 --- a/contrib/libs/lzma/COPYING +++ b/contrib/libs/lzma/COPYING @@ -3,75 +3,81 @@ XZ Utils Licensing ================== Different licenses apply to different files in this package. Here - is a rough summary of which licenses apply to which parts of this - package (but check the individual files to be sure!): + is a summary of which licenses apply to which parts of this package: - - liblzma is in the public domain. + - liblzma is under the BSD Zero Clause License (0BSD). - - xz, xzdec, and lzmadec command line tools are in the public - domain unless GNU getopt_long had to be compiled and linked - in from the lib directory. The getopt_long code is under - GNU LGPLv2.1+. + - The command line tools xz, xzdec, lzmadec, and lzmainfo are + under 0BSD except that, on systems that don't have a usable + getopt_long, GNU getopt_long is compiled and linked in from the + 'lib' directory. The getopt_long code is under GNU LGPLv2.1+. - The scripts to grep, diff, and view compressed files have been - adapted from gzip. These scripts and their documentation are - under GNU GPLv2+. + adapted from GNU gzip. These scripts (xzgrep, xzdiff, xzless, + and xzmore) are under GNU GPLv2+. The man pages of the scripts + are under 0BSD; they aren't based on the man pages of GNU gzip. - - All the documentation in the doc directory and most of the - XZ Utils specific documentation files in other directories - are in the public domain. + - Most of the XZ Utils specific documentation that is in + plain text files (like README, INSTALL, PACKAGERS, NEWS, + and ChangeLog) are under 0BSD unless stated otherwise in + the file itself. The files xz-file-format.txt and + lzma-file-format.xt are in the public domain but may + be distributed under the terms of 0BSD too. - Note: The JavaScript files (under the MIT license) have - been removed from the Doxygen-generated HTML version of the - liblzma API documentation. Doxygen itself is under the GNU GPL - but the remaining files generated by Doxygen are not affected - by the licenses used in Doxygen because Doxygen licensing has - the following exception: + - Translated messages and man pages are under 0BSD except that + some old translations are in the public domain. - "Documents produced by doxygen are derivative works - derived from the input used in their production; - they are not affected by this license." + - Test files and test code in the 'tests' directory, and + debugging utilities in the 'debug' directory are under + the BSD Zero Clause License (0BSD). - - Translated messages are in the public domain. + - The GNU Autotools based build system contains files that are + under GNU GPLv2+, GNU GPLv3+, and a few permissive licenses. + These files don't affect the licensing of the binaries being + built. - - The build system contains public domain files, and files that - are under GNU GPLv2+ or GNU GPLv3+. None of these files end up - in the binaries being built. + - The 'extra' directory contains files that are under various + free software licenses. These aren't built or installed as + part of XZ Utils. - - Test files and test code in the tests directory, and debugging - utilities in the debug directory are in the public domain. + For the files under the BSD Zero Clause License (0BSD), if + a copyright notice is needed, the following is sufficient: - - The extra directory may contain public domain files, and files - that are under various free software licenses. + Copyright (C) The XZ Utils authors and contributors - You can do whatever you want with the files that have been put into - the public domain. If you find public domain legally problematic, - take the previous sentence as a license grant. If you still find - the lack of copyright legally problematic, you have too many - lawyers. - - As usual, this software is provided "as is", without any warranty. - - If you copy significant amounts of public domain code from XZ Utils + If you copy significant amounts of 0BSD-licensed code from XZ Utils into your project, acknowledging this somewhere in your software is polite (especially if it is proprietary, non-free software), but - naturally it is not legally required. Here is an example of a good - notice to put into "about box" or into documentation: + it is not legally required by the license terms. Here is an example + of a good notice to put into "about box" or into documentation: - This software includes code from XZ Utils - <https://xz.tukaani.org/xz-utils/>. + This software includes code from XZ Utils <https://tukaani.org/xz/>. The following license texts are included in the following files: + - COPYING.0BSD: BSD Zero Clause License - COPYING.LGPLv2.1: GNU Lesser General Public License version 2.1 - COPYING.GPLv2: GNU General Public License version 2 - COPYING.GPLv3: GNU General Public License version 3 - Note that the toolchain (compiler, linker etc.) may add some code - pieces that are copyrighted. Thus, it is possible that e.g. liblzma - binary wouldn't actually be in the public domain in its entirety - even though it contains no copyrighted code from the XZ Utils source - package. - - If you have questions, don't hesitate to ask the author(s) for more - information. + A note about old XZ Utils releases: + + XZ Utils releases 5.4.6 and older and 5.5.1alpha have a + significant amount of code put into the public domain and + that obviously remains so. The switch from public domain to + 0BSD for newer releases was made in Febrary 2024 because + public domain has (real or perceived) legal ambiguities in + some jurisdictions. + + There is very little *practical* difference between public + domain and 0BSD. The main difference likely is that one + shouldn't claim that 0BSD-licensed code is in the public + domain; 0BSD-licensed code is copyrighted but available under + an extremely permissive license. Neither 0BSD nor public domain + require retaining or reproducing author, copyright holder, or + license notices when distributing the software. (Compare to, + for example, BSD 2-Clause "Simplified" License which does have + such requirements.) + + If you have questions, don't hesitate to ask for more information. + The contact information is in the README file. diff --git a/contrib/libs/lzma/COPYING.0BSD b/contrib/libs/lzma/COPYING.0BSD new file mode 100644 index 0000000000..4322122aec --- /dev/null +++ b/contrib/libs/lzma/COPYING.0BSD @@ -0,0 +1,11 @@ +Permission to use, copy, modify, and/or distribute this +software for any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL +THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/contrib/libs/lzma/INSTALL b/contrib/libs/lzma/INSTALL index 7ef2cb467a..ba187bbbb5 100644 --- a/contrib/libs/lzma/INSTALL +++ b/contrib/libs/lzma/INSTALL @@ -90,21 +90,15 @@ XZ Utils Installation 1.2.3. MINIX 3 - The default install of MINIX 3 includes Amsterdam Compiler Kit (ACK), - which doesn't support C99. Install GCC to compile XZ Utils. + Version 3.3.0 and later are supported. - MINIX 3.1.8 and older have bugs in /usr/include/stdint.h, which has - to be patched before XZ Utils can be compiled correctly. See - <http://gforge.cs.vu.nl/gf/project/minix/tracker/?action=TrackerItemEdit&tracker_item_id=537>. + Multithreading isn't supported because MINIX 3 doesn't have + pthreads. The option --disable-threads must be passed to configure + as this isn't autodetected. - MINIX 3.2.0 and later use a different libc and aren't affected by - the above bug. - - XZ Utils doesn't have code to detect the amount of physical RAM and - number of CPU cores on MINIX 3. - - See section 5.4 in this file about symbol visibility warnings (you - may want to pass gl_cv_cc_visibility=no to configure). + Note that disabling threads causes "make check" to show a few tests + as skipped ("SKIP"). It's only due to a few threading-dependent + subtests are skipped. See the matching tests/test_*.log files. 1.2.4. OpenVMS @@ -143,43 +137,37 @@ XZ Utils Installation 1.2.7. Windows - If it is enough to build liblzma (no command line tools): + The "windows" directory contains instructions for a few types + of builds: - - There is CMake support. It should be good enough to build - static liblzma or liblzma.dll with Visual Studio. The CMake - support may work with MinGW or MinGW-w64. Read the comment - in the beginning of CMakeLists.txt before running CMake! + - INSTALL-MinGW-w64_with_CMake.txt + Simple instructions how to build XZ Utils natively on + Windows using only CMake and a prebuilt toolchain + (GCC + MinGW-w64 or Clang/LLVM + MinGW-w64). - - There are Visual Studio project files under the "windows" - directory. See windows/INSTALL-MSVC.txt. In the future the - project files will be removed when CMake support is good - enough. Thus, please test the CMake version and help fix - possible issues. + - INSTALL-MinGW-w64_with_Autotools.txt + Native build under MSYS2 or cross-compilation from + GNU/Linux using a bash script that creates a .zip + and .7z archives of the binaries and documentation. + The related file README-Windows.txt is for the + resulting binary package. - To build also the command line tools: + - INSTALL-MSVC.txt + Building with MSVC / Visual Studio and CMake. - - MinGW-w64 + MSYS (32-bit and 64-bit x86): This is used - for building the official binary packages for Windows. - There is windows/build.bash to ease packaging XZ Utils with - MinGW(-w64) + MSYS into a redistributable .zip or .7z file. - See windows/INSTALL-MinGW.txt for more information. + - liblzma-crt-mixing.txt + Documentation what to take into account as a programmer + if liblzma.dll and the application don't use the same + CRT (MSVCRT or UCRT). - - MinGW + MSYS (32-bit x86): I haven't recently tested this. + Other choices: - - Cygwin 1.7.35 and later: NOTE that using XZ Utils >= 5.2.0 - under Cygwin older than 1.7.35 can lead to DATA LOSS! If - you must use an old Cygwin version, stick to XZ Utils 5.0.x - which is safe under older Cygwin versions. You can check - the Cygwin version with the command "cygcheck -V". + - Cygwin: https://cygwin.com/ + Building on Cygwin can be done like on many POSIX operating + systems. XZ Utils >= 5.2.0 isn't compatible with Cygwin older + than 1.7.35 (data loss!). 1.7.35 was released on 2015-03-04. - It may be possible to build liblzma with other toolchains too, but - that will probably require writing a separate makefile. Building - the command line tools with non-GNU toolchains will be harder than - building only liblzma. - - Even if liblzma is built with MinGW(-w64), the resulting DLL can - be used by other compilers and linkers, including MSVC. See - windows/README-Windows.txt for details. + - MSYS2: https://www.msys2.org/ 1.2.8. DOS @@ -358,21 +346,27 @@ XZ Utils Installation will still be installed. The $docdir can be changed with --docdir=DIR. + --enable-doxygen + Enable generation of the HTML version of the liblzma API + documentation using Doxygen. The resulting files are + installed to $docdir/api. This option assumes that + the 'doxygen' tool is available. + --disable-assembler - liblzma includes some assembler optimizations. Currently - there is only assembler code for CRC32 and CRC64 for - 32-bit x86. - - All the assembler code in liblzma is position-independent - code, which is suitable for use in shared libraries and - position-independent executables. So far only i386 - instructions are used, but the code is optimized for i686 - class CPUs. If you are compiling liblzma exclusively for + This disables CRC32 and CRC64 assembly code on + 32-bit x86. This option currently does nothing + on other architectures (not even on x86-64). + + The 32-bit x86 assembly is position-independent code + which is suitable for use in shared libraries and + position-independent executables. It uses only i386 + instructions but the code is optimized for i686 class + CPUs. If you are compiling liblzma exclusively for pre-i686 systems, you may want to disable the assembler code. --disable-clmul-crc - Disable the use carryless multiplication for CRC + Disable the use of carryless multiplication for CRC calculation even if compiler support for it is detected. The code uses runtime detection of SSSE3, SSE4.1, and CLMUL instructions on x86. On 32-bit x86 this currently @@ -383,6 +377,16 @@ XZ Utils Installation required extensions (-msse4.1 -mpclmul) then runtime detection isn't used and the generic code is omitted. + --disable-arm64-crc32 + Disable the use of the ARM64 CRC32 instruction extension + even if compiler support for it is detected. The code will + detect support for the instruction at runtime. + + If using compiler options that unconditionally allow the + required extensions (-march=armv8-a+crc or -march=armv8.1-a + and later) then runtime detection isn't used and the + generic code is omitted. + --enable-unaligned-access Allow liblzma to use unaligned memory access for 16-bit, 32-bit, and 64-bit loads and stores. This should be @@ -392,8 +396,43 @@ XZ Utils Installation slow. This option shouldn't be used on systems that rely on such emulation. - Unaligned access is enabled by default on x86, x86-64, - big endian PowerPC, some ARM, and some ARM64 systems. + Unaligned access is enabled by default on these: + - 32-bit x86 + - 64-bit x86-64 + - 32-bit big endian PowerPC + - 64-bit big endian PowerPC + - 64-bit little endian PowerPC + - some RISC-V [1] + - some 32-bit ARM [2] + - some 64-bit ARM64 [2] (NOTE: Autodetection bug + if using GCC -mstrict-align, see below.) + + [1] Unaligned access is enabled by default if + configure sees that the C compiler + #defines __riscv_misaligned_fast. + + [2] Unaligned access is enabled by default if + configure sees that the C compiler + #defines __ARM_FEATURE_UNALIGNED: + + - ARMv7 + GCC or Clang: It works. The options + -munaligned-access and -mno-unaligned-access + affect this macro correctly. + + - ARM64 + Clang: It works. The options + -munaligned-access, -mno-unaligned-access, + and -mstrict-align affect this macro correctly. + Clang >= 17 supports -mno-strict-align too. + + - ARM64 + GCC: It partially works. The macro + is always #defined by GCC versions at least + up to 13.2, even when using -mstrict-align. + If building for strict-align ARM64, the + configure option --disable-unaligned-access + should be used if using a GCC version that has + this issue because otherwise the performance + may be degraded. It likely won't crash due to + how unaligned access is done in the C code. --enable-unsafe-type-punning This enables use of code like @@ -460,14 +499,14 @@ XZ Utils Installation too. This is the default for 32-bit x86 Windows builds. Unless the compiler supports __attribute__((__constructor__)), - the `win95' threading is incompatible with + the 'win95' threading is incompatible with --enable-small. vista Use Windows Vista compatible threads. The resulting binaries won't run on Windows XP or older. This is the default for Windows excluding 32-bit x86 builds (that is, on - x86-64 the default is `vista'). + x86-64 the default is 'vista'). no Disable threading support. This is the same as using --disable-threads. @@ -480,13 +519,16 @@ XZ Utils Installation one thread, something bad may happen. --enable-sandbox=METHOD - There is limited sandboxing support in the xz tool. If - built with sandbox support, it's used automatically when - (de)compressing exactly one file to standard output and - the options --files or --files0 weren't used. This is a - common use case, for example, (de)compressing .tar.xz - files via GNU tar. The sandbox is also used for - single-file `xz --test' or `xz --list'. + There is limited sandboxing support in the xz and xzdec + tools. If built with sandbox support, xz uses it + automatically when (de)compressing exactly one file to + standard output when the options --files or --files0 aren't + used. This is a common use case, for example, + (de)compressing .tar.xz files via GNU tar. The sandbox is + also used for single-file 'xz --test' or 'xz --list'. + xzdec always uses the sandbox, except when more than one + file are decompressed. In this case it will enable the + sandbox for the last file that is decompressed. Supported METHODs: @@ -498,7 +540,7 @@ XZ Utils Installation no Disable sandboxing support. capsicum - Use Capsicum (FreeBSD >= 10) for + Use Capsicum (FreeBSD >= 10.2) for sandboxing. If no Capsicum support is found, configure will give an error. @@ -506,10 +548,50 @@ XZ Utils Installation sandboxing. If pledge(2) isn't found, configure will give an error. - --enable-symbol-versions - Use symbol versioning for liblzma. This is enabled by - default on GNU/Linux, other GNU-based systems, and - FreeBSD. + landlock + Use Landlock (Linux >= 5.13) for + sandboxing. If no Landlock support + is found, configure will give an error. + + --enable-symbol-versions[=VARIANT] + Use symbol versioning for liblzma shared library. + This is enabled by default on GNU/Linux (glibc only), + other GNU-based systems, and FreeBSD. + + Symbol versioning is never used for static liblzma. This + option is ignored when not building a shared library. + + Supported VARIANTs: + + no Disable symbol versioning. This is the + same as using --disable-symbol-versions. + + auto Autodetect between "no", "linux", + and "generic". + + yes Autodetect between "linux" and + "generic". This forces symbol + versioning to be used when + building a shared library. + + generic Generic version is the default for + FreeBSD and GNU/Linux on MicroBlaze. + + This is also used on GNU/Linux when + building with NVIDIA HPC Compiler + because the compiler doesn't support + the features required for the "linux" + variant below. + + linux Special version for GNU/Linux (glibc + only). This adds a few extra symbol + versions for compatibility with binaries + that have been linked against a liblzma + version that has been patched with + "xz-5.2.2-compat-libs.patch" from + RHEL/CentOS 7. That patch was used + by some build tools outside of + RHEL/CentOS 7 too. --enable-debug This enables the assert() macro and possibly some other diff --git a/contrib/libs/lzma/NEWS b/contrib/libs/lzma/NEWS index 5858fadded..77eab744f5 100644 --- a/contrib/libs/lzma/NEWS +++ b/contrib/libs/lzma/NEWS @@ -2,6 +2,544 @@ XZ Utils Release Notes ====================== +5.6.2 (2024-05-29) + + * Remove the backdoor (CVE-2024-3094). + + * Not changed: Memory sanitizer (MSAN) has a false positive + in the CRC CLMUL code which also makes OSS Fuzz unhappy. + Valgrind is smarter and doesn't complain. + + A revision to the CLMUL code is coming anyway and this issue + will be cleaned up as part of it. It won't be backported to + 5.6.x or 5.4.x because the old code isn't wrong. There is + no reason to risk introducing regressions in old branches + just to silence a false positive. + + * liblzma: + + - lzma_index_decoder() and lzma_index_buffer_decode(): Fix + a missing output pointer initialization (*i = NULL) if the + functions are called with invalid arguments. The API docs + say that such an initialization is always done. In practice + this matters very little because the problem can only occur + if the calling application has a bug and these functions + return LZMA_PROG_ERROR. + + - lzma_str_to_filters(): Fix a missing output pointer + initialization (*error_pos = 0). This is very similar + to the fix above. + + - Fix C standard conformance with function pointer types. + + - Remove GNU indirect function (IFUNC) support. This is *NOT* + done for security reasons even though the backdoor relied on + this code. The performance benefits of IFUNC are too tiny in + this project to make the extra complexity worth it. + + - FreeBSD on ARM64: Add error checking to CRC32 instruction + support detection. + + - Fix building with NVIDIA HPC SDK. + + * xz: + + - Fix a C standard conformance issue in --block-list parsing + (arithmetic on a null pointer). + + - Fix a warning from GNU groff when processing the man page: + "warning: cannot select font 'CW'" + + * xzdec: Add support for Linux Landlock ABI version 4. xz already + had the v3-to-v4 change but it had been forgotten from xzdec. + + * Autotools-based build system (configure): + + - Symbol versioning variant can now be overridden with + --enable-symbol-versions. Documentation in INSTALL was + updated to match. + + - Add new configure option --enable-doxygen to enable + generation and installation of the liblzma API documentation + using Doxygen. Documentation in INSTALL and PACKAGERS was + updated to match. + + CMake: + + - Fix detection of Linux Landlock support. The detection code + in CMakeLists.txt had been sabotaged. + + - Disable symbol versioning on non-glibc Linux to match what + the Autotools build does. For example, symbol versioning + isn't enabled with musl. + + - Symbol versioning variant can now be overridden by setting + SYMBOL_VERSIONING to "OFF", "generic", or "linux". + + - Add support for all tests in typical build configurations. + Now the only difference to the tests coverage to Autotools + is that CMake-based build will skip more tests if features + are disabled. Such builds are only for special cases like + embedded systems. + + - Separate the CMake code for the tests into tests/tests.cmake. + It is used conditionally, thus it is possible to + + rm -rf tests + + and the CMake-based build will still work normally except + that no tests are then available. + + - Add a option ENABLE_DOXYGEN to enable generation and + installation of the liblzma API documentation using Doxygen. + + * Documentation: + + - Omit the Doxygen-generated liblzma API documentation from the + package. Instead, the generation and installation of the API + docs can be enabled with a configure or CMake option if + Doxygen is available. + + - Remove the XZ logo which was used in the API documentation. + The logo has been retired and isn't used by the project + anymore. However, it's OK to use it in contexts that refer + to the backdoor incident. + + - Remove the PDF versions of the man pages from the source + package. These existed primarily for users of operating + systems which don't come with tools to render man page + source files. The plain text versions are still included + in doc/man/txt. PDF files can still be generated to doc/man, + if the required tools are available, using "make pdf" after + running "configure". + + - Update home page URLs back to their old locations on + tukaani.org. + + - Update maintainer info. + + * Tests: + + - In tests/files/README, explain how to recreate the ARM64 + test files. + + - Remove two tests that used tiny x86 and SPARC object files + as the input files. The matching .c file was included but + the object files aren't easy to reproduce. The test cases + weren't great anyway; they were from the early days (2009) + of the project when the test suite had very few tests. + + - Improve a few tests. + + +5.6.1 (2024-03-09) + + IMPORTANT: This fixed bugs in the backdoor (CVE-2024-3094) (someone + had forgot to run Valgrind). + + * liblzma: Fixed two bugs relating to GNU indirect function (IFUNC) + with GCC. The more serious bug caused a program linked with + liblzma to crash on start up if the flag -fprofile-generate was + used to build liblzma. The second bug caused liblzma to falsely + report an invalid write to Valgrind when loading liblzma. + + * xz: Changed the messages for thread reduction due to memory + constraints to only appear under the highest verbosity level. + + * Build: + + - Fixed a build issue when the header file <linux/landlock.h> + was present on the system but the Landlock system calls were + not defined in <sys/syscall.h>. + + - The CMake build now warns and disables NLS if both gettext + tools and pre-created .gmo files are missing. Previously, + this caused the CMake build to fail. + + * Minor improvements to man pages. + + * Minor improvements to tests. + + +5.6.0 (2024-02-24) + + IMPORTANT: This added a backdoor (CVE-2024-3094). It's enabled only + in the release tarballs. + + This bumps the minor version of liblzma because new features were + added. The API and ABI are still backward compatible with liblzma + 5.4.x and 5.2.x and 5.0.x. + + NOTE: As described in the NEWS for 5.5.2beta, the core components + are now under the BSD Zero Clause License (0BSD). + + Since 5.5.2beta: + + * liblzma: + + - Disabled the branchless C variant in the LZMA decoder based + on the benchmark results from the community. + + - Disabled x86-64 inline assembly on x32 to fix the build. + + * Sandboxing support in xz: + + - Landlock is now used even when xz needs to create files. + In this case the sandbox has to be more permissive than + when no files need to be created. A similar thing was + already in use with pledge(2) since 5.3.4alpha. + + - Landlock and pledge(2) are now stricter when reading from + more than one input file and only writing to standard output. + + - Added support for Landlock ABI version 4. + + * CMake: + + - Default to -O2 instead of -O3 with CMAKE_BUILD_TYPE=Release. + -O3 is not useful for speed and makes the code larger. + + - Now builds lzmainfo and lzmadec. + + - xzdiff, xzgrep, xzless, xzmore, and their symlinks are now + installed. The scripts are also tested during "make test". + + - Added translation support for xz, lzmainfo, and the + man pages. + + - Applied the symbol versioning workaround for MicroBlaze that + is used in the Autotools build. + + - The general XZ Utils and liblzma API documentation is now + installed. + + - The CMake component names were changed a little and several + were added. liblzma_Runtime and liblzma_Development are + unchanged. + + - Minimum required CMake version is now 3.14. However, + translation support is disabled with CMake versions + older than 3.20. + + - The CMake-based build is now close to feature parity with the + Autotools-based build. Most importantly a few tests aren't + run yet. Testing the CMake-based build on different operating + systems would be welcome now. See the comment at the top of + CMakeLists.txt. + + * Fixed a bug in the Autotools feature test for ARM64 CRC32 + instruction support for old versions of Clang. This did not + affect the CMake build. + + * Windows: + + - The build instructions in INSTALL and windows/INSTALL*.txt + were revised completely. + + - windows/build-with-cmake.bat along with the instructions + in windows/INSTALL-MinGW-w64_with_CMake.txt should make + it very easy to build liblzma.dll and xz.exe on Windows + using CMake and MinGW-w64 with either GCC or Clang/LLVM. + + - windows/build.bash was updated. It now works on MSYS2 and + on GNU/Linux (cross-compiling) to create a .zip and .7z + package for 32-bit and 64-bit x86 using GCC + MinGW-w64. + + * The TODO file is no longer installed as part of the + documentation. The file is out of date and does not reflect + the actual tasks that will be completed in the future. + + * Translations: + + - Translated lzmainfo man pages are now installed. These + had been forgotten in earlier versions. + + - Updated Croatian, Esperanto, German, Hungarian, Korean, + Polish, Romanian, Spanish, Swedish, Vietnamese, and Ukrainian + translations. + + - Updated German, Korean, Romanian, and Ukrainian man page + translations. + + * Added a few tests. + + Summary of new features added in the 5.5.x development releases: + + * liblzma: + + - LZMA decoder: Speed optimizations to the C code and + added GCC & Clang compatible inline assembly for x86-64. + + - Added lzma_mt_block_size() to recommend a Block size for + multithreaded encoding. + + - Added CLMUL-based CRC32 on x86-64 and E2K with runtime + processor detection. Similar to CRC64, on 32-bit x86 it + isn't available unless --disable-assembler is used. + + - Optimized the CRC32 calculation on ARM64 platforms using the + CRC32 instructions. Runtime detection for the instruction is + used on GNU/Linux, FreeBSD, Windows, and macOS. If the + compiler flags indicate unconditional CRC32 instruction + support (+crc) then the generic version is not built. + + - Added definitions of mask values like + LZMA_INDEX_CHECK_MASK_CRC32 to <lzma/index.h>. + + * xz: + + - Multithreaded mode is now the default. This improves + compression speed and creates .xz files that can be + decompressed in multithreaded mode. The downsides are + increased memory usage and slightly worse compression ratio. + + - Added a new command line option --filters to set the filter + chain using the liblzma filter string syntax. + + - Added new command line options --filters1 ... --filters9 to + set additional filter chains using the liblzma filter string + syntax. The --block-list option now allows specifying filter + chains that were set using these new options. + + - Ported the command line tools to Windows MSVC. + Visual Studio 2015 or later is required. + + * Added lz4 support to xzdiff/xzcmp and xzgrep. + + +5.5.2beta (2024-02-14) + + * Licensing change: The core components are now under the + BSD Zero Clause License (0BSD). In XZ Utils 5.4.6 and older + and 5.5.1alpha these components are in the public domain and + obviously remain so; the change affects the new releases only. + + 0BSD is an extremely permissive license which doesn't require + retaining or reproducing copyright or license notices when + distributing the code, thus in practice there is extremely + little difference to public domain. + + * liblzma + + - Significant speed optimizations to the LZMA decoder were + made. There are now three variants that can be chosen at + build time: + + * Basic C version: This is a few percent faster than + 5.4.x due to some new optimizations. + + * Branchless C: This is currently the default on platforms + for which there is no assembly code. This should be a few + percent faster than the basic C version. + + * x86-64 inline assembly. This works with GCC and Clang. + + The default choice can currently be overridden by setting + LZMA_RANGE_DECODER_CONFIG in CPPFLAGS: 0 means the basic + version and 3 means that branchless C version. + + - Optimized the CRC32 calculation on ARM64 platforms using the + CRC32 instructions. The instructions are optional in ARMv8.0 + and are required in ARMv8.1 and later. Runtime detection for + the instruction is used on GNU/Linux, FreeBSD, Windows, and + macOS. If the compiler flags indicate unconditional CRC32 + instruction support (+crc) then the generic version is not + built. + + * Added lz4 support to xzdiff/xzcmp and xzgrep. + + * Man pages of xzdiff/xzcmp, xzgrep, and xzmore were rewritten + to simplify licensing of the man page translations. + + * Translations: + + - Updated Chinese (simplified), German, Korean, Polish, + Romanian, Spanish, Swedish, and Ukrainian translations. + + - Updated German, Korean, Romanian, and Ukrainian man page + translations. + + * Small improvements to the tests. + + * Added doc/examples/11_file_info.c. It was added to the Git + repository in 2017 but forgotten to be added into distribution + tarballs. + + * Removed doc/examples_old. These were from 2012. + + * Removed the macos/build.sh script. It had not been updated + since 2013. + + +5.5.1alpha (2024-01-26) + + * Added a new filter for RISC-V binaries. The filter can be used + for 32-bit and 64-bit binaries with either little or big + endianness. In liblzma, the Filter ID is LZMA_FILTER_RISCV (0x0B) + and the xz option is --riscv. liblzma filter string syntax + recognizes this filter as "riscv". + + * liblzma: + + - Added lzma_mt_block_size() to recommend a Block size for + multithreaded encoding + + - Added CLMUL-based CRC32 on x86-64 and E2K with runtime + processor detection. Similar to CRC64, on 32-bit x86 it + isn't available unless --disable-assembler is used. + + - Implemented GNU indirect function (IFUNC) as a runtime + function dispatching method for CRC32 and CRC64 fast + implementations on x86. Only GNU/Linux (glibc) and FreeBSD + builds will use IFUNC, unless --enable-ifunc is specified to + configure. + + - Added definitions of mask values like + LZMA_INDEX_CHECK_MASK_CRC32 to <lzma/index.h>. + + - The XZ logo is now included in the Doxygen generated + documentation. It is licensed under Creative Commons + Attribution-ShareAlike 4.0. + + * xz: + + - Multithreaded mode is now the default. This improves + compression speed and creates .xz files that can be + decompressed multithreaded at the cost of increased memory + usage and slightly worse compression ratio. + + - Added new command line option --filters to set the filter + chain using liblzma filter string syntax. + + - Added new command line options --filters1 ... --filters9 to + set additional filter chains using liblzma filter string + syntax. The --block-list option now allows specifying filter + chains that were set using these new options. + + - Added support for Linux Landlock as a sandboxing method. + + - xzdec now supports pledge(2), Capsicum, and Linux Landlock as + sandboxing methods. + + - Progress indicator time stats remain accurate after pausing + xz with SIGTSTP. + + - Ported xz and xzdec to Windows MSVC. Visual Studio 2015 or + later is required. + + * CMake Build: + + - Supports pledge(2), Capsicum, and Linux Landlock sandboxing + methods. + + - Replacement functions for getopt_long() are used on platforms + that do not have it. + + * Enabled unaligned access by default on PowerPC64LE and on RISC-V + targets that define __riscv_misaligned_fast. + + * Tests: + + - Added two new fuzz targets to OSS-Fuzz. + + - Implemented Continuous Integration (CI) testing using + GitHub Actions. + + * Changed quoting style from `...' to '...' in all messages, + scripts, and documentation. + + * Added basic Codespell support to help catch typo errors. + + +5.4.7 (2024-05-29) + + * Not changed: Memory sanitizer (MSAN) has a false positive + in the CRC CLMUL code which also makes OSS Fuzz unhappy. + Valgrind is smarter and doesn't complain. + + A revision to the CLMUL code is coming anyway and this issue + will be cleaned up as part of it. It won't be backported to + 5.6.x or 5.4.x because the old code isn't wrong. There is + no reason to risk introducing regressions in old branches + just to silence a false positive. + + * liblzma: + + - lzma_index_decoder() and lzma_index_buffer_decode(): Fix + a missing output pointer initialization (*i = NULL) if the + functions are called with invalid arguments. The API docs + say that such an initialization is always done. In practice + this matters very little because the problem can only occur + if the calling application has a bug and these functions + return LZMA_PROG_ERROR. + + - lzma_str_to_filters(): Fix a missing output pointer + initialization (*error_pos = 0). This is very similar + to the fix above. + + - Fix C standard conformance with function pointer types. + This newly showed up with Clang 17 with -fsanitize=undefined. + There are no bug reports about this. + + - Fix building with NVIDIA HPC SDK. + + * xz: + + - Fix a C standard conformance issue in --block-list parsing + (arithmetic on a null pointer). + + - Fix a warning from GNU groff when processing the man page: + "warning: cannot select font 'CW'" + + - Fix outdated threading related information on the man page. + + * xzless: + + - With "less" version 451 and later, use "||-" instead of "|-" + in the environment variable LESSOPEN. This way compressed + files that contain no uncompressed data are shown correctly + as empty. + + - With "less" version 632 and later, use --show-preproc-errors + to make "less" show a warning on decompression errors. + + * Autotools-based build system (configure): + + - Symbol versioning variant can now be overridden with + --enable-symbol-versions. Documentation in INSTALL was + updated to match. + + CMake: + + - Linux on MicroBlaze is handled specially now. This matches + the changes made to the Autotools-based build in XZ Utils + 5.4.2 and 5.2.11. + + - Disable symbol versioning on non-glibc Linux to match what + the Autotools build does. For example, symbol versioning + isn't enabled with musl. + + - Symbol versioning variant can now be overridden by setting + SYMBOL_VERSIONING to "OFF", "generic", or "linux". + + * Documentation: + + - Clarify the description of --disable-assembler in INSTALL. + The option only affects 32-bit x86 assembly usage. + + - Add doc/examples/11_file_info.c. It was added to the + Git repository in 2017 but forgotten to be added into + distribution tarballs. + + - Don't install the TODO file as part of the documentation. + The file is out of date. + + - Update home page URLs back to their old locations on + tukaani.org. + + - Update maintainer info. + + 5.4.6 (2024-01-26) * Fixed a bug involving internal function pointers in liblzma not @@ -826,6 +1364,162 @@ XZ Utils Release Notes (FreeBSD >= 10). +5.2.13 (2024-05-29) + + * liblzma: + + - lzma_index_append(): Fix an assertion failure that could be + triggered by a large unpadded_size argument. It was verified + that there was no other bug than the assertion failure. + + - lzma_index_decoder() and lzma_index_buffer_decode(): Fix + a missing output pointer initialization (*i = NULL) if the + functions are called with invalid arguments. The API docs + say that such an initialization is always done. In practice + this matters very little because the problem can only occur + if the calling application has a bug and these functions + return LZMA_PROG_ERROR. + + - Fix C standard conformance with function pointer types. + This newly showed up with Clang 17 with -fsanitize=undefined. + There are no bug reports about this. + + - Fix building with NVIDIA HPC SDK. + + - Fix building with Windows Vista threads and --enable-small. + (CMake build doesn't support ENABLE_SMALL in XZ Utils 5.2.x.) + + * xz: + + - Fix a C standard conformance issue in --block-list parsing + (arithmetic on a null pointer). + + - Fix a warning from GNU groff when processing the man page: + "warning: cannot select font 'CW'" + + - Windows: Handle special files such as "con" or "nul". Earlier + the following wrote "foo" to the console and deleted the input + file "con_xz": + + echo foo | xz > con_xz + xz --suffix=_xz --decompress con_xz + + - Windows: Fix an issue that prevented reading from or writing + to non-terminal character devices like NUL. + + * xzless: + + - With "less" version 451 and later, use "||-" instead of "|-" + in the environment variable LESSOPEN. This way compressed + files that contain no uncompressed data are shown correctly + as empty. + + - With "less" version 632 and later, use --show-preproc-errors + to make "less" show a warning on decompression errors. + + * Build systems: + + - Add a new line to liblzma.pc for MSYS2 (Windows): + + Cflags.private: -DLZMA_API_STATIC + + When compiling code that will link against static liblzma, + the LZMA_API_STATIC macro needs to be defined on Windows. + + - Autotools (configure): + + * Symbol versioning variant can now be overridden with + --enable-symbol-versions. Documentation in INSTALL was + updated to match. + + - CMake: + + * Fix a bug that prevented other projects from including + liblzma multiple times using find_package(). + + * Fix a bug where configuring CMake multiple times resulted + in HAVE_CLOCK_GETTIME and HAVE_CLOCK_MONOTONIC not being + defined. + + * Fix the build with MinGW-w64-based Clang/LLVM 17. + llvm-windres now has more accurate GNU windres emulation + so the GNU windres workaround from 5.4.1 is needed with + llvm-windres version 17 too. + + * The import library on Windows is now properly named + "liblzma.dll.a" instead of "libliblzma.dll.a" + + * Add large file support by default for platforms that + need it to handle files larger than 2 GiB. This includes + MinGW-w64, even 64-bit builds. + + * Linux on MicroBlaze is handled specially now. This + matches the changes made to the Autotools-based build + in XZ Utils 5.4.2 and 5.2.11. + + * Disable symbol versioning on non-glibc Linux to match + what the Autotools build does. For example, symbol + versioning isn't enabled with musl. + + * Symbol versioning variant can now be overridden by + setting SYMBOL_VERSIONING to "OFF", "generic", or + "linux". + + * Documentation: + + - Clarify the description of --disable-assembler in INSTALL. + The option only affects 32-bit x86 assembly usage. + + - Don't install the TODO file as part of the documentation. + The file is out of date. + + - Update home page URLs back to their old locations on + tukaani.org. + + - Update maintainer info. + + +5.2.12 (2023-05-04) + + * Fixed a build system bug that prevented building liblzma as a + shared library when configured with --disable-threads. This bug + affected releases 5.2.6 to 5.2.11 and 5.4.0 to 5.4.2. + + * Include <intrin.h> for Windows intrinsic functions where they are + needed. This fixed a bug that prevented building liblzma using + clang-cl on Windows. + + * Minor update to the Croatian translation. The small change + applies to a string in both 5.2 and 5.4 branches. + + +5.2.11 (2023-03-18) + + * Removed all possible cases of null pointer + 0. It is undefined + behavior in C99 and C17. This was detected by a sanitizer and had + not caused any known issues. + + * Build systems: + + - Added a workaround for building with GCC on MicroBlaze Linux. + GCC 12 on MicroBlaze doesn't support the __symver__ attribute + even though __has_attribute(__symver__) returns true. The + build is now done without the extra RHEL/CentOS 7 symbols + that were added in XZ Utils 5.2.7. The workaround only + applies to the Autotools build (not CMake). + + - CMake: Ensure that the C compiler language is set to C99 or + a newer standard. + + - CMake changes from XZ Utils 5.4.1: + + * Added a workaround for a build failure with + windres from GNU binutils. + + * Included the Windows resource files in the xz + and xzdec build rules. + + 5.2.10 (2022-12-13) * xz: Don't modify argv[] when parsing the --memlimit* and diff --git a/contrib/libs/lzma/README b/contrib/libs/lzma/README index 9a29c1b217..9d097deff3 100644 --- a/contrib/libs/lzma/README +++ b/contrib/libs/lzma/README @@ -67,24 +67,25 @@ XZ Utils 1.1. Overall documentation - README This file - - INSTALL.generic Generic install instructions for those not familiar - with packages using GNU Autotools - INSTALL Installation instructions specific to XZ Utils - PACKAGERS Information to packagers of XZ Utils - - COPYING XZ Utils copyright and license information - COPYING.GPLv2 GNU General Public License version 2 - COPYING.GPLv3 GNU General Public License version 3 - COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1 - - AUTHORS The main authors of XZ Utils - THANKS Incomplete list of people who have helped making - this software - NEWS User-visible changes between XZ Utils releases - ChangeLog Detailed list of changes (commit log) - TODO Known bugs and some sort of to-do list + README This file + + INSTALL.generic Generic install instructions for those not + familiar with packages using GNU Autotools + INSTALL Installation instructions specific to XZ Utils + PACKAGERS Information to packagers of XZ Utils + + COPYING XZ Utils copyright and license information + COPYING.0BSD BSD Zero Clause License + COPYING.GPLv2 GNU General Public License version 2 + COPYING.GPLv3 GNU General Public License version 3 + COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1 + + AUTHORS The main authors of XZ Utils + THANKS Incomplete list of people who have helped making + this software + NEWS User-visible changes between XZ Utils releases + ChangeLog Detailed list of changes (commit log) + TODO Known bugs and some sort of to-do list Note that only some of the above files are included in binary packages. @@ -94,9 +95,9 @@ XZ Utils The command-line tools are documented as man pages. In source code releases (and possibly also in some binary packages), the man pages - are also provided in plain text (ASCII only) and PDF formats in the - directory "doc/man" to make the man pages more accessible to those - whose operating system doesn't provide an easy way to view man pages. + are also provided in plain text (ASCII only) format in the directory + "doc/man" to make the man pages more accessible to those whose + operating system doesn't provide an easy way to view man pages. 1.3. Documentation for liblzma @@ -287,21 +288,23 @@ XZ Utils XZ Embedded is a limited implementation written for use in the Linux kernel, but it is also suitable for other embedded use. - https://xz.tukaani.org/xz-embedded/ + https://tukaani.org/xz/embedded.html XZ for Java is a complete implementation written in pure Java. - https://xz.tukaani.org/xz-for-java/ + https://tukaani.org/xz/java.html 6. Contact information ---------------------- - If you have questions, bug reports, patches etc. related to XZ Utils, - the project maintainers Lasse Collin and Jia Tan can be reached via - <xz@tukaani.org>. + XZ Utils in general: + - Home page: https://tukaani.org/xz/ + - Email to maintainer(s): xz@tukaani.org + - IRC: #tukaani on Libera Chat + - GitHub: https://github.com/tukaani-project/xz - You might find Lasse also from #tukaani on Libera Chat (IRC). - The nick is Larhzu. The channel tends to be pretty quiet, - so just ask your question and someone might wake up. + Lead maintainer: + - Email: Lasse Collin <lasse.collin@tukaani.org> + - IRC: Larhzu on Libera Chat diff --git a/contrib/libs/lzma/common/mythread.h b/contrib/libs/lzma/common/mythread.h index 4495e017b2..10ea2d42c2 100644 --- a/contrib/libs/lzma/common/mythread.h +++ b/contrib/libs/lzma/common/mythread.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file mythread.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef MYTHREAD_H @@ -112,6 +111,25 @@ mythread_sigmask(int how, const sigset_t *restrict set, # include <sys/time.h> #endif +// MinGW-w64 with winpthreads: +// +// NOTE: Typical builds with MinGW-w64 don't use this code (MYTHREAD_POSIX). +// Instead, native Windows threading APIs are used (MYTHREAD_VISTA or +// MYTHREAD_WIN95). +// +// MinGW-w64 has _sigset_t (an integer type) in <sys/types.h>. +// If _POSIX was #defined, the header would add the alias sigset_t too. +// Let's keep this working even without _POSIX. +// +// There are no functions that actually do something with sigset_t +// because signals barely exist on Windows. The sigfillset macro below +// is just to silence warnings. There is no sigfillset() in MinGW-w64. +#ifdef __MINGW32__ +# include <sys/types.h> +# define sigset_t _sigset_t +# define sigfillset(set_ptr) do { *(set_ptr) = 0; } while (0) +#endif + #define MYTHREAD_RET_TYPE void * #define MYTHREAD_RET_VALUE NULL @@ -140,11 +158,13 @@ typedef struct timespec mythread_condtime; // Use pthread_sigmask() to set the signal mask in multi-threaded programs. // Do nothing on OpenVMS since it lacks pthread_sigmask(). +// Do nothing on MinGW-w64 too to silence warnings (its pthread_sigmask() +// is #defined to 0 so it's a no-op). static inline void mythread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict oset) { -#ifdef __VMS +#if defined(__VMS) || defined(__MINGW32__) (void)how; (void)set; (void)oset; @@ -180,7 +200,7 @@ mythread_join(mythread thread) } -// Initiatlizes a mutex. Returns zero on success and non-zero on error. +// Initializes a mutex. Returns zero on success and non-zero on error. static inline int mythread_mutex_init(mythread_mutex *mutex) { diff --git a/contrib/libs/lzma/common/sysdefs.h b/contrib/libs/lzma/common/sysdefs.h index f04e45dd21..5f3785b513 100644 --- a/contrib/libs/lzma/common/sysdefs.h +++ b/contrib/libs/lzma/common/sysdefs.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file sysdefs.h @@ -8,9 +10,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_SYSDEFS_H @@ -159,13 +158,16 @@ typedef unsigned char _Bool; #include <string.h> -// As of MSVC 2013, inline and restrict are supported with -// non-standard keywords. -#if defined(_WIN32) && defined(_MSC_VER) -# ifndef inline +// Visual Studio 2013 update 2 supports only __inline, not inline. +// MSVC v19.0 / VS 2015 and newer support both. +// +// MSVC v19.27 (VS 2019 version 16.7) added support for restrict. +// Older ones support only __restrict. +#ifdef _MSC_VER +# if _MSC_VER < 1900 && !defined(inline) # define inline __inline # endif -# ifndef restrict +# if _MSC_VER < 1927 && !defined(restrict) # define restrict __restrict # endif #endif diff --git a/contrib/libs/lzma/common/tuklib_common.h b/contrib/libs/lzma/common/tuklib_common.h index b1f531ea4a..7554dfc86f 100644 --- a/contrib/libs/lzma/common/tuklib_common.h +++ b/contrib/libs/lzma/common/tuklib_common.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_common.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef TUKLIB_COMMON_H @@ -57,8 +56,28 @@ # define TUKLIB_GNUC_REQ(major, minor) 0 #endif -#if TUKLIB_GNUC_REQ(2, 5) +// tuklib_attr_noreturn attribute is used to mark functions as non-returning. +// We cannot use "noreturn" as the macro name because then C23 code that +// uses [[noreturn]] would break as it would expand to [[ [[noreturn]] ]]. +// +// tuklib_attr_noreturn must be used at the beginning of function declaration +// to work in all cases. The [[noreturn]] syntax is the most limiting, it +// must be even before any GNU C's __attribute__ keywords: +// +// tuklib_attr_noreturn +// __attribute__((nonnull(1))) +// extern void foo(const char *s); +// +// FIXME: Update __STDC_VERSION__ for the final C23 version. 202000 is used +// by GCC 13 and Clang 15 with -std=c2x. +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000 +# define tuklib_attr_noreturn [[noreturn]] +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112 +# define tuklib_attr_noreturn _Noreturn +#elif TUKLIB_GNUC_REQ(2, 5) # define tuklib_attr_noreturn __attribute__((__noreturn__)) +#elif defined(_MSC_VER) +# define tuklib_attr_noreturn __declspec(noreturn) #else # define tuklib_attr_noreturn #endif diff --git a/contrib/libs/lzma/common/tuklib_config.h b/contrib/libs/lzma/common/tuklib_config.h index 9d470ba732..b27251dc27 100644 --- a/contrib/libs/lzma/common/tuklib_config.h +++ b/contrib/libs/lzma/common/tuklib_config.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + // If config.h isn't available, assume that the headers required by // tuklib_common.h are available. This is required by crc32_tablegen.c. #ifdef HAVE_CONFIG_H diff --git a/contrib/libs/lzma/common/tuklib_cpucores.c b/contrib/libs/lzma/common/tuklib_cpucores.c index bb3f2f752b..c4a781ac38 100644 --- a/contrib/libs/lzma/common/tuklib_cpucores.c +++ b/contrib/libs/lzma/common/tuklib_cpucores.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_cpucores.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "tuklib_cpucores.h" diff --git a/contrib/libs/lzma/common/tuklib_cpucores.h b/contrib/libs/lzma/common/tuklib_cpucores.h index be1ce1c175..edff9395e4 100644 --- a/contrib/libs/lzma/common/tuklib_cpucores.h +++ b/contrib/libs/lzma/common/tuklib_cpucores.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_cpucores.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef TUKLIB_CPUCORES_H diff --git a/contrib/libs/lzma/common/tuklib_integer.h b/contrib/libs/lzma/common/tuklib_integer.h index 3fb797ee2a..8230a71229 100644 --- a/contrib/libs/lzma/common/tuklib_integer.h +++ b/contrib/libs/lzma/common/tuklib_integer.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_integer.h @@ -14,7 +16,7 @@ /// /// Endianness-converting integer operations (these can be macros!) /// (XX = 16, 32, or 64; Y = b or l): -/// - Byte swapping: bswapXX(num) +/// - Byte swapping: byteswapXX(num) /// - Byte order conversions to/from native (byteswaps if Y isn't /// the native endianness): convXXYe(num) /// - Unaligned reads: readXXYe(ptr) @@ -37,9 +39,6 @@ // Authors: Lasse Collin // Joachim Henke // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef TUKLIB_INTEGER_H @@ -67,38 +66,41 @@ #if defined(HAVE___BUILTIN_BSWAPXX) // GCC >= 4.8 and Clang -# define bswap16(n) __builtin_bswap16(n) -# define bswap32(n) __builtin_bswap32(n) -# define bswap64(n) __builtin_bswap64(n) +# define byteswap16(num) __builtin_bswap16(num) +# define byteswap32(num) __builtin_bswap32(num) +# define byteswap64(num) __builtin_bswap64(num) #elif defined(HAVE_BYTESWAP_H) // glibc, uClibc, dietlibc # include <byteswap.h> # ifdef HAVE_BSWAP_16 -# define bswap16(num) bswap_16(num) +# define byteswap16(num) bswap_16(num) # endif # ifdef HAVE_BSWAP_32 -# define bswap32(num) bswap_32(num) +# define byteswap32(num) bswap_32(num) # endif # ifdef HAVE_BSWAP_64 -# define bswap64(num) bswap_64(num) +# define byteswap64(num) bswap_64(num) # endif #elif defined(HAVE_SYS_ENDIAN_H) // *BSDs and Darwin # include <sys/endian.h> +# define byteswap16(num) bswap16(num) +# define byteswap32(num) bswap32(num) +# define byteswap64(num) bswap64(num) #elif defined(HAVE_SYS_BYTEORDER_H) // Solaris # error #include <sys/byteorder.h> # ifdef BSWAP_16 -# define bswap16(num) BSWAP_16(num) +# define byteswap16(num) BSWAP_16(num) # endif # ifdef BSWAP_32 -# define bswap32(num) BSWAP_32(num) +# define byteswap32(num) BSWAP_32(num) # endif # ifdef BSWAP_64 -# define bswap64(num) BSWAP_64(num) +# define byteswap64(num) BSWAP_64(num) # endif # ifdef BE_16 # define conv16be(num) BE_16(num) @@ -120,15 +122,15 @@ # endif #endif -#ifndef bswap16 -# define bswap16(n) (uint16_t)( \ +#ifndef byteswap16 +# define byteswap16(n) (uint16_t)( \ (((n) & 0x00FFU) << 8) \ | (((n) & 0xFF00U) >> 8) \ ) #endif -#ifndef bswap32 -# define bswap32(n) (uint32_t)( \ +#ifndef byteswap32 +# define byteswap32(n) (uint32_t)( \ (((n) & UINT32_C(0x000000FF)) << 24) \ | (((n) & UINT32_C(0x0000FF00)) << 8) \ | (((n) & UINT32_C(0x00FF0000)) >> 8) \ @@ -136,8 +138,8 @@ ) #endif -#ifndef bswap64 -# define bswap64(n) (uint64_t)( \ +#ifndef byteswap64 +# define byteswap64(n) (uint64_t)( \ (((n) & UINT64_C(0x00000000000000FF)) << 56) \ | (((n) & UINT64_C(0x000000000000FF00)) << 40) \ | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \ @@ -161,23 +163,23 @@ # define conv64be(num) ((uint64_t)(num)) # endif # ifndef conv16le -# define conv16le(num) bswap16(num) +# define conv16le(num) byteswap16(num) # endif # ifndef conv32le -# define conv32le(num) bswap32(num) +# define conv32le(num) byteswap32(num) # endif # ifndef conv64le -# define conv64le(num) bswap64(num) +# define conv64le(num) byteswap64(num) # endif #else # ifndef conv16be -# define conv16be(num) bswap16(num) +# define conv16be(num) byteswap16(num) # endif # ifndef conv32be -# define conv32be(num) bswap32(num) +# define conv32be(num) byteswap32(num) # endif # ifndef conv64be -# define conv64be(num) bswap64(num) +# define conv64be(num) byteswap64(num) # endif # ifndef conv16le # define conv16le(num) ((uint16_t)(num)) @@ -625,7 +627,7 @@ write64le(uint8_t *buf, uint64_t num) // aligned but some compilers have language extensions to do that. With // such language extensions the memcpy() method gives excellent results. // -// What to do on a strict-align system when no known language extentensions +// What to do on a strict-align system when no known language extensions // are available? Falling back to byte-by-byte access would be safe but ruin // optimizations that have been made specifically with aligned access in mind. // As a compromise, aligned reads will fall back to non-compliant type punning diff --git a/contrib/libs/lzma/common/tuklib_physmem.c b/contrib/libs/lzma/common/tuklib_physmem.c index af03ed8e65..0a4fcb3871 100644 --- a/contrib/libs/lzma/common/tuklib_physmem.c +++ b/contrib/libs/lzma/common/tuklib_physmem.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_physmem.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "tuklib_physmem.h" @@ -73,23 +72,20 @@ #endif -// With GCC >= 8.1 with -Wextra and Clang >= 13 with -Wcast-function-type -// will warn about the Windows-specific code. -#if defined(__has_warning) -# if __has_warning("-Wcast-function-type") -# define CAN_DISABLE_WCAST_FUNCTION_TYPE 1 -# endif -#elif TUKLIB_GNUC_REQ(8,1) -# define CAN_DISABLE_WCAST_FUNCTION_TYPE 1 -#endif - - extern uint64_t tuklib_physmem(void) { uint64_t ret = 0; #if defined(_WIN32) || defined(__CYGWIN__) + // This requires Windows 2000 or later. + MEMORYSTATUSEX meminfo; + meminfo.dwLength = sizeof(meminfo); + if (GlobalMemoryStatusEx(&meminfo)) + ret = meminfo.ullTotalPhys; + +/* + // Old version that is compatible with even Win95: if ((GetVersion() & 0xFF) >= 5) { // Windows 2000 and later have GlobalMemoryStatusEx() which // supports reporting values greater than 4 GiB. To keep the @@ -125,6 +121,7 @@ tuklib_physmem(void) GlobalMemoryStatus(&meminfo); ret = meminfo.dwTotalPhys; } +*/ #elif defined(__OS2__) unsigned long mem; diff --git a/contrib/libs/lzma/common/tuklib_physmem.h b/contrib/libs/lzma/common/tuklib_physmem.h index 09e2a51338..f35bfbab9c 100644 --- a/contrib/libs/lzma/common/tuklib_physmem.h +++ b/contrib/libs/lzma/common/tuklib_physmem.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_physmem.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef TUKLIB_PHYSMEM_H diff --git a/contrib/libs/lzma/liblzma/api/lzma.h b/contrib/libs/lzma/liblzma/api/lzma.h index f6b9469a3d..6ca6e503d8 100644 --- a/contrib/libs/lzma/liblzma/api/lzma.h +++ b/contrib/libs/lzma/liblzma/api/lzma.h @@ -1,31 +1,30 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file api/lzma.h * \brief The public API of liblzma data compression library * \mainpage * - * liblzma is a public domain general-purpose data compression library with - * a zlib-like API. The native file format is .xz, but also the old .lzma - * format and raw (no headers) streams are supported. Multiple compression - * algorithms (filters) are supported. Currently LZMA2 is the primary filter. + * liblzma is a general-purpose data compression library with a zlib-like API. + * The native file format is .xz, but also the old .lzma format and raw (no + * headers) streams are supported. Multiple compression algorithms (filters) + * are supported. Currently LZMA2 is the primary filter. * - * liblzma is part of XZ Utils <https://xz.tukaani.org/xz-utils/>. XZ Utils + * liblzma is part of XZ Utils <https://tukaani.org/xz/>. XZ Utils * includes a gzip-like command line tool named xz and some other tools. - * XZ Utils is developed and maintained by Lasse Collin and Jia Tan. + * XZ Utils is developed and maintained by Lasse Collin. + * + * Major parts of liblzma are based on code written by Igor Pavlov, + * specifically the LZMA SDK <https://7-zip.org/sdk.html>. * - * Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK - * <https://7-zip.org/sdk.html>. + * The SHA-256 implementation in liblzma is based on code written by + * Wei Dai in Crypto++ Library <https://www.cryptopp.com/>. * - * The SHA-256 implementation is based on the public domain code found from - * 7-Zip <https://7-zip.org/>, which has a modified version of the public - * domain SHA-256 code found from Crypto++ <https://www.cryptopp.com/>. - * The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. + * liblzma is distributed under the BSD Zero Clause License (0BSD). */ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H diff --git a/contrib/libs/lzma/liblzma/api/lzma/base.h b/contrib/libs/lzma/liblzma/api/lzma/base.h index 75cdd72acf..590e1d22bb 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/base.h +++ b/contrib/libs/lzma/liblzma/api/lzma/base.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/base.h * \brief Data types and functions used in many places in liblzma API @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL @@ -21,8 +20,8 @@ * * This is here because C89 doesn't have stdbool.h. To set a value for * variables having type lzma_bool, you can use - * - C99's `true' and `false' from stdbool.h; - * - C++'s internal `true' and `false'; or + * - C99's 'true' and 'false' from stdbool.h; + * - C++'s internal 'true' and 'false'; or * - integers one (true) and zero (false). */ typedef unsigned char lzma_bool; @@ -258,7 +257,7 @@ typedef enum { */ /* - * These eumerations may be used internally by liblzma + * These enumerations may be used internally by liblzma * but they will never be returned to applications. */ LZMA_RET_INTERNAL1 = 101, @@ -273,13 +272,13 @@ typedef enum { /** - * \brief The `action' argument for lzma_code() + * \brief The 'action' argument for lzma_code() * * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, LZMA_FULL_BARRIER, - * or LZMA_FINISH, the same `action' must be used until lzma_code() returns + * or LZMA_FINISH, the same 'action' must be used until lzma_code() returns * LZMA_STREAM_END. Also, the amount of input (that is, strm->avail_in) must * not be modified by the application until lzma_code() returns - * LZMA_STREAM_END. Changing the `action' or modifying the amount of input + * LZMA_STREAM_END. Changing the 'action' or modifying the amount of input * will make lzma_code() return LZMA_PROG_ERROR. */ typedef enum { @@ -393,8 +392,8 @@ typedef enum { * Single-threaded mode only: liblzma doesn't make an internal copy of * lzma_allocator. Thus, it is OK to change these function pointers in * the middle of the coding process, but obviously it must be done - * carefully to make sure that the replacement `free' can deallocate - * memory allocated by the earlier `alloc' function(s). + * carefully to make sure that the replacement 'free' can deallocate + * memory allocated by the earlier 'alloc' function(s). * * Multithreaded mode: liblzma might internally store pointers to the * lzma_allocator given via the lzma_stream structure. The application @@ -422,7 +421,7 @@ typedef struct { * liblzma never sets this to zero. * * \return Pointer to the beginning of a memory block of - * `size' bytes, or NULL if allocation fails + * 'size' bytes, or NULL if allocation fails * for some reason. When allocation fails, functions * of liblzma return LZMA_MEM_ERROR. * @@ -622,7 +621,7 @@ typedef struct { * to and get output from liblzma. * * See the description of the coder-specific initialization function to find - * out what `action' values are supported by the coder. + * out what 'action' values are supported by the coder. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. diff --git a/contrib/libs/lzma/liblzma/api/lzma/bcj.h b/contrib/libs/lzma/liblzma/api/lzma/bcj.h index 0c84e0cff9..7f6611feb3 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/bcj.h +++ b/contrib/libs/lzma/liblzma/api/lzma/bcj.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/bcj.h * \brief Branch/Call/Jump conversion filters @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL @@ -53,6 +52,11 @@ */ #define LZMA_FILTER_ARM64 LZMA_VLI_C(0x0A) +/** + * \brief Filter for RISC-V binaries + */ +#define LZMA_FILTER_RISCV LZMA_VLI_C(0x0B) + /** * \brief Options for BCJ filters diff --git a/contrib/libs/lzma/liblzma/api/lzma/block.h b/contrib/libs/lzma/liblzma/api/lzma/block.h index ec5e77a69a..05b77e59aa 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/block.h +++ b/contrib/libs/lzma/liblzma/api/lzma/block.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/block.h * \brief .xz Block handling @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL diff --git a/contrib/libs/lzma/liblzma/api/lzma/check.h b/contrib/libs/lzma/liblzma/api/lzma/check.h index b37197d2c7..e7a50ed3a3 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/check.h +++ b/contrib/libs/lzma/liblzma/api/lzma/check.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/check.h * \brief Integrity checks @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL diff --git a/contrib/libs/lzma/liblzma/api/lzma/container.h b/contrib/libs/lzma/liblzma/api/lzma/container.h index 2849fbfd3c..8e4af42038 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/container.h +++ b/contrib/libs/lzma/liblzma/api/lzma/container.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/container.h * \brief File formats @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL @@ -297,7 +296,7 @@ extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) * to call lzma_end() after failed initialization. * * If initialization succeeds, use lzma_code() to do the actual encoding. - * Valid values for `action' (the second argument of lzma_code()) are + * Valid values for 'action' (the second argument of lzma_code()) are * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. * @@ -436,6 +435,34 @@ extern LZMA_API(lzma_ret) lzma_stream_encoder_mt( /** + * \brief Calculate recommended Block size for multithreaded .xz encoder + * + * This calculates a recommended Block size for multithreaded encoding given + * a filter chain. This is used internally by lzma_stream_encoder_mt() to + * determine the Block size if the block_size member is not set to the + * special value of 0 in the lzma_mt options struct. + * + * If one wishes to change the filters between Blocks, this function is + * helpful to set the block_size member of the lzma_mt struct before calling + * lzma_stream_encoder_mt(). Since the block_size member represents the + * maximum possible Block size for the multithreaded .xz encoder, one can + * use this function to find the maximum recommended Block size based on + * all planned filter chains. Otherwise, the multithreaded encoder will + * base its maximum Block size on the first filter chain used (if the + * block_size member is not set), which may unnecessarily limit the Block + * size for a later filter chain. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Recommended Block size in bytes, or UINT64_MAX if + * an error occurred. + */ +extern LZMA_API(uint64_t) lzma_mt_block_size(const lzma_filter *filters) + lzma_nothrow; + + +/** * \brief Initialize .lzma encoder (legacy file format) * * The .lzma format is sometimes called the LZMA_Alone format, which is the @@ -651,13 +678,13 @@ extern LZMA_API(lzma_ret) lzma_microlzma_encoder( * supported by liblzma, only the .xz and .lz formats allow concatenated * files. Concatenated files are not allowed with the legacy .lzma format. * - * This flag also affects the usage of the `action' argument for lzma_code(). + * This flag also affects the usage of the 'action' argument for lzma_code(). * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END - * unless LZMA_FINISH is used as `action'. Thus, the application has to set + * unless LZMA_FINISH is used as 'action'. Thus, the application has to set * LZMA_FINISH in the same way as it does when encoding. * * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH - * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required. + * as 'action' for lzma_code(), but the usage of LZMA_FINISH isn't required. */ #define LZMA_CONCATENATED UINT32_C(0x08) @@ -765,7 +792,7 @@ extern LZMA_API(lzma_ret) lzma_stream_decoder_mt( * as it doesn't support any decoder flags. It will return LZMA_STREAM_END * after one .lzma stream.) * - * \param strm Pointer to lzma_stream that is at least initialized + * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param memlimit Memory usage limit as bytes. Use UINT64_MAX * to effectively disable the limiter. liblzma @@ -791,7 +818,7 @@ extern LZMA_API(lzma_ret) lzma_auto_decoder( /** * \brief Initialize .lzma decoder (legacy file format) * - * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * Valid 'action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. * There is no need to use LZMA_FINISH, but it's allowed because it may * simplify certain types of applications. * diff --git a/contrib/libs/lzma/liblzma/api/lzma/delta.h b/contrib/libs/lzma/liblzma/api/lzma/delta.h index 7a725bc407..5ebacef815 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/delta.h +++ b/contrib/libs/lzma/liblzma/api/lzma/delta.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/delta.h * \brief Delta filter @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL diff --git a/contrib/libs/lzma/liblzma/api/lzma/filter.h b/contrib/libs/lzma/liblzma/api/lzma/filter.h index 1d887b4f2f..e86809c4e3 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/filter.h +++ b/contrib/libs/lzma/liblzma/api/lzma/filter.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/filter.h * \brief Common filter related types and functions @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL @@ -43,7 +42,7 @@ typedef struct { /** * \brief Filter ID * - * Use constants whose name begin with `LZMA_FILTER_' to specify + * Use constants whose name begin with 'LZMA_FILTER_' to specify * different filters. In an array of lzma_filter structures, use * LZMA_VLI_UNKNOWN to indicate end of filters. * @@ -199,7 +198,7 @@ extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) * * This function may be useful when implementing custom file formats. * - * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the + * The 'action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the * filter chain supports it), or LZMA_FINISH. * * \param strm Pointer to lzma_stream that is at least @@ -223,7 +222,7 @@ extern LZMA_API(lzma_ret) lzma_raw_encoder( * * The initialization of raw decoder goes similarly to raw encoder. * - * The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using + * The 'action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using * LZMA_FINISH is not required, it is supported just for convenience. * * \param strm Pointer to lzma_stream that is at least diff --git a/contrib/libs/lzma/liblzma/api/lzma/hardware.h b/contrib/libs/lzma/liblzma/api/lzma/hardware.h index f34897d874..7a1a84fccc 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/hardware.h +++ b/contrib/libs/lzma/liblzma/api/lzma/hardware.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/hardware.h * \brief Hardware information @@ -23,9 +25,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL diff --git a/contrib/libs/lzma/liblzma/api/lzma/index.h b/contrib/libs/lzma/liblzma/api/lzma/index.h index 6eee4d6813..b17025e3d9 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/index.h +++ b/contrib/libs/lzma/liblzma/api/lzma/index.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/index.h * \brief Handling of .xz Index and related information @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL @@ -234,7 +233,7 @@ typedef struct { } block; /** - * \private Internal struct. + * \private Internal data * * Internal data which is used to store the state of the iterator. * The exact format may vary between liblzma versions, so don't @@ -302,6 +301,28 @@ typedef enum { /** + * \brief Mask for return value from lzma_index_checks() for check none + * + * \note This and the other CHECK_MASK macros were added in 5.5.1alpha. + */ +#define LZMA_INDEX_CHECK_MASK_NONE (UINT32_C(1) << LZMA_CHECK_NONE) + +/** + * \brief Mask for return value from lzma_index_checks() for check CRC32 + */ +#define LZMA_INDEX_CHECK_MASK_CRC32 (UINT32_C(1) << LZMA_CHECK_CRC32) + +/** + * \brief Mask for return value from lzma_index_checks() for check CRC64 + */ +#define LZMA_INDEX_CHECK_MASK_CRC64 (UINT32_C(1) << LZMA_CHECK_CRC64) + +/** + * \brief Mask for return value from lzma_index_checks() for check SHA256 + */ +#define LZMA_INDEX_CHECK_MASK_SHA256 (UINT32_C(1) << LZMA_CHECK_SHA256) + +/** * \brief Calculate memory usage of lzma_index * * On disk, the size of the Index field depends on both the number of Records @@ -431,6 +452,7 @@ extern LZMA_API(lzma_ret) lzma_index_stream_flags( * showing the Check types to the user. * * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. + * These masks are defined for convenience as LZMA_INDEX_CHECK_MASK_XXX * * \param i Pointer to lzma_index structure * @@ -651,7 +673,7 @@ extern LZMA_API(lzma_bool) lzma_index_iter_locate( * function succeeds, the memory allocated for src * is freed or moved to be part of dest, and all * iterators pointing to src will become invalid. -* \param allocator lzma_allocator for custom allocator functions. + * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * * \return Possible lzma_ret values: @@ -686,7 +708,7 @@ extern LZMA_API(lzma_index *) lzma_index_dup( * \param strm Pointer to properly prepared lzma_stream * \param i Pointer to lzma_index which should be encoded. * - * The valid `action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * The valid 'action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. * It is enough to use only one of them (you can choose freely). * * \return Possible lzma_ret values: @@ -715,7 +737,7 @@ extern LZMA_API(lzma_ret) lzma_index_encoder( * don't allow 0 here and return LZMA_PROG_ERROR; * later versions treat 0 as if 1 had been specified. * - * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * Valid 'action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. * There is no need to use LZMA_FINISH, but it's allowed because it may * simplify certain types of applications. * @@ -771,7 +793,7 @@ extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, * lzma_index is allowed to require. The value * pointed by this pointer is modified if and only * if LZMA_MEMLIMIT_ERROR is returned. - * \param allocator lzma_allocator for custom allocator functions. + * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. @@ -819,10 +841,10 @@ extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, * expect to see the same exact value for the same file if you change the * input buffer size or switch to a different liblzma version. * - * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * Valid 'action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. * You only need to use LZMA_RUN; LZMA_FINISH is only supported because it * might be convenient for some applications. If you use LZMA_FINISH and if - * lzma_code() asks the application to seek, remember to reset `action' back + * lzma_code() asks the application to seek, remember to reset 'action' back * to LZMA_RUN unless you hit the end of the file again. * * Possible return values from lzma_code(): diff --git a/contrib/libs/lzma/liblzma/api/lzma/index_hash.h b/contrib/libs/lzma/liblzma/api/lzma/index_hash.h index a2d4c4845b..68f9024eb3 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/index_hash.h +++ b/contrib/libs/lzma/liblzma/api/lzma/index_hash.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/index_hash.h * \brief Validate Index by using a hash function @@ -9,9 +11,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL diff --git a/contrib/libs/lzma/liblzma/api/lzma/lzma12.h b/contrib/libs/lzma/liblzma/api/lzma/lzma12.h index 8ef6ea5b50..05f5b66eb5 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/lzma12.h +++ b/contrib/libs/lzma/liblzma/api/lzma/lzma12.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/lzma12.h * \brief LZMA1 and LZMA2 filters @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL @@ -289,7 +288,7 @@ typedef struct { * \brief Number of literal context bits * * How many of the highest bits of the previous uncompressed - * eight-bit byte (also known as `literal') are taken into + * eight-bit byte (also known as 'literal') are taken into * account when predicting the bits of the next literal. * * E.g. in typical English text, an upper-case letter is diff --git a/contrib/libs/lzma/liblzma/api/lzma/stream_flags.h b/contrib/libs/lzma/liblzma/api/lzma/stream_flags.h index 7622a62120..a33fe46837 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/stream_flags.h +++ b/contrib/libs/lzma/liblzma/api/lzma/stream_flags.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/stream_flags.h * \brief .xz Stream Header and Stream Footer encoder and decoder @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL diff --git a/contrib/libs/lzma/liblzma/api/lzma/version.h b/contrib/libs/lzma/liblzma/api/lzma/version.h index 71bd8087d9..53526b992c 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/version.h +++ b/contrib/libs/lzma/liblzma/api/lzma/version.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/version.h * \brief Version number @@ -6,9 +8,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL @@ -20,10 +19,10 @@ #define LZMA_VERSION_MAJOR 5 /** \brief Minor version number of the liblzma release. */ -#define LZMA_VERSION_MINOR 4 +#define LZMA_VERSION_MINOR 6 /** \brief Patch version number of the liblzma release. */ -#define LZMA_VERSION_PATCH 6 +#define LZMA_VERSION_PATCH 2 /** * \brief Version stability marker diff --git a/contrib/libs/lzma/liblzma/api/lzma/vli.h b/contrib/libs/lzma/liblzma/api/lzma/vli.h index f9ad15500d..6b049021b9 100644 --- a/contrib/libs/lzma/liblzma/api/lzma/vli.h +++ b/contrib/libs/lzma/liblzma/api/lzma/vli.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: 0BSD */ + /** * \file lzma/vli.h * \brief Variable-length integer handling @@ -17,9 +19,6 @@ /* * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL @@ -68,9 +67,8 @@ typedef uint64_t lzma_vli; * This is useful to test that application has given acceptable values * for example in the uncompressed_size and compressed_size variables. * - * \return True if the integer is representable as VLI or if it - * indicates unknown value. False if the integer cannot be - * represented as VLI. + * \return True if the integer is representable as a VLI or if it + * indicates an unknown value. False otherwise. */ #define lzma_vli_is_valid(vli) \ ((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN) diff --git a/contrib/libs/lzma/liblzma/check/check.c b/contrib/libs/lzma/liblzma/check/check.c index 428ddaeb77..7734ace185 100644 --- a/contrib/libs/lzma/liblzma/check/check.c +++ b/contrib/libs/lzma/liblzma/check/check.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file check.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "check.h" diff --git a/contrib/libs/lzma/liblzma/check/check.h b/contrib/libs/lzma/liblzma/check/check.h index 6b8cd546a8..08627e783a 100644 --- a/contrib/libs/lzma/liblzma/check/check.h +++ b/contrib/libs/lzma/liblzma/check/check.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file check.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_CHECK_H diff --git a/contrib/libs/lzma/liblzma/check/crc32_fast.c b/contrib/libs/lzma/liblzma/check/crc32_fast.c index eed7350582..fce1af6119 100644 --- a/contrib/libs/lzma/liblzma/check/crc32_fast.c +++ b/contrib/libs/lzma/liblzma/check/crc32_fast.c @@ -1,35 +1,40 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file crc32.c /// \brief CRC32 calculation -/// -/// Calculate the CRC32 using the slice-by-eight algorithm. -/// It is explained in this document: -/// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf -/// The code in this file is not the same as in Intel's paper, but -/// the basic principle is identical. -// -// Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. +// Authors: Lasse Collin +// Ilya Kurdyukov +// Hans Jansen // /////////////////////////////////////////////////////////////////////////////// #include "check.h" -#include "crc_macros.h" +#include "crc_common.h" + +#if defined(CRC_X86_CLMUL) +# define BUILDING_CRC32_CLMUL +# include "crc_x86_clmul.h" +#elif defined(CRC32_ARM64) +# error #include "crc32_arm64.h" +#endif -// If you make any changes, do some benchmarking! Seemingly unrelated -// changes can very easily ruin the performance (and very probably is -// very compiler dependent). -extern LZMA_API(uint32_t) -lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) +#ifdef CRC32_GENERIC + +/////////////////// +// Generic CRC32 // +/////////////////// + +static uint32_t +crc32_generic(const uint8_t *buf, size_t size, uint32_t crc) { crc = ~crc; #ifdef WORDS_BIGENDIAN - crc = bswap32(crc); + crc = byteswap32(crc); #endif if (size > 8) { @@ -75,8 +80,125 @@ lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); #ifdef WORDS_BIGENDIAN - crc = bswap32(crc); + crc = byteswap32(crc); #endif return ~crc; } +#endif + + +#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) + +////////////////////////// +// Function dispatching // +////////////////////////// + +// If both the generic and arch-optimized implementations are built, then +// the function to use is selected at runtime because the system running +// the binary might not have the arch-specific instruction set extension(s) +// available. The dispatch methods in order of priority: +// +// 1. Constructor. This method uses __attribute__((__constructor__)) to +// set crc32_func at load time. This avoids extra computation (and any +// unlikely threading bugs) on the first call to lzma_crc32() to decide +// which implementation should be used. +// +// 2. First Call Resolution. On the very first call to lzma_crc32(), the +// call will be directed to crc32_dispatch() instead. This will set the +// appropriate implementation function and will not be called again. +// This method does not use any kind of locking but is safe because if +// multiple threads run the dispatcher simultaneously then they will all +// set crc32_func to the same value. + +typedef uint32_t (*crc32_func_type)( + const uint8_t *buf, size_t size, uint32_t crc); + +// This resolver is shared between all dispatch methods. +static crc32_func_type +crc32_resolve(void) +{ + return is_arch_extension_supported() + ? &crc32_arch_optimized : &crc32_generic; +} + + +#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +// Constructor method. +# define CRC32_SET_FUNC_ATTR __attribute__((__constructor__)) +static crc32_func_type crc32_func; +#else +// First Call Resolution method. +# define CRC32_SET_FUNC_ATTR +static uint32_t crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc); +static crc32_func_type crc32_func = &crc32_dispatch; +#endif + +CRC32_SET_FUNC_ATTR +static void +crc32_set_func(void) +{ + crc32_func = crc32_resolve(); + return; +} + +#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +static uint32_t +crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc) +{ + // When __attribute__((__constructor__)) isn't supported, set the + // function pointer without any locking. If multiple threads run + // the detection code in parallel, they will all end up setting + // the pointer to the same value. This avoids the use of + // mythread_once() on every call to lzma_crc32() but this likely + // isn't strictly standards compliant. Let's change it if it breaks. + crc32_set_func(); + return crc32_func(buf, size, crc); +} + +#endif +#endif + + +extern LZMA_API(uint32_t) +lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ +#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) + // On x86-64, if CLMUL is available, it is the best for non-tiny + // inputs, being over twice as fast as the generic slice-by-four + // version. However, for size <= 16 it's different. In the extreme + // case of size == 1 the generic version can be five times faster. + // At size >= 8 the CLMUL starts to become reasonable. It + // varies depending on the alignment of buf too. + // + // The above doesn't include the overhead of mythread_once(). + // At least on x86-64 GNU/Linux, pthread_once() is very fast but + // it still makes lzma_crc32(buf, 1, crc) 50-100 % slower. When + // size reaches 12-16 bytes the overhead becomes negligible. + // + // So using the generic version for size <= 16 may give better + // performance with tiny inputs but if such inputs happen rarely + // it's not so obvious because then the lookup table of the + // generic version may not be in the processor cache. +#ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS + if (size <= 16) + return crc32_generic(buf, size, crc); +#endif + +/* +#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR + // See crc32_dispatch(). This would be the alternative which uses + // locking and doesn't use crc32_dispatch(). Note that on Windows + // this method needs Vista threads. + mythread_once(crc64_set_func); +#endif +*/ + return crc32_func(buf, size, crc); + +#elif defined(CRC32_ARCH_OPTIMIZED) + return crc32_arch_optimized(buf, size, crc); + +#else + return crc32_generic(buf, size, crc); +#endif +} diff --git a/contrib/libs/lzma/liblzma/check/crc32_table.c b/contrib/libs/lzma/liblzma/check/crc32_table.c index e9c1928cf9..7778e79e92 100644 --- a/contrib/libs/lzma/liblzma/check/crc32_table.c +++ b/contrib/libs/lzma/liblzma/check/crc32_table.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file crc32_table.c @@ -5,18 +7,36 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" + +// FIXME: Compared to crc_common.h this has to check for __x86_64__ too +// so that in 32-bit builds crc32_x86.S won't break due to a missing table. +#if defined(HAVE_USABLE_CLMUL) && ((defined(__x86_64__) && defined(__SSSE3__) \ + && defined(__SSE4_1__) && defined(__PCLMUL__)) \ + || (defined(__e2k__) && __iset__ >= 6)) +# define NO_CRC32_TABLE + +#elif defined(HAVE_ARM64_CRC32) \ + && !defined(WORDS_BIGENDIAN) \ + && defined(__ARM_FEATURE_CRC32) +# define NO_CRC32_TABLE +#endif + + +#if !defined(HAVE_ENCODERS) && defined(NO_CRC32_TABLE) +// No table needed. Use a typedef to avoid an empty translation unit. +typedef void lzma_crc32_dummy; + +#else // Having the declaration here silences clang -Wmissing-variable-declarations. extern const uint32_t lzma_crc32_table[8][256]; -#ifdef WORDS_BIGENDIAN -# error #include "crc32_table_be.h" -#else -# include "crc32_table_le.h" +# ifdef WORDS_BIGENDIAN +# error #include "crc32_table_be.h" +# else +# include "crc32_table_le.h" +# endif #endif diff --git a/contrib/libs/lzma/liblzma/check/crc32_table_le.h b/contrib/libs/lzma/liblzma/check/crc32_table_le.h index 25f4fc4435..e89c21a7b2 100644 --- a/contrib/libs/lzma/liblzma/check/crc32_table_le.h +++ b/contrib/libs/lzma/liblzma/check/crc32_table_le.h @@ -1,4 +1,6 @@ -/* This file has been automatically generated by crc32_tablegen.c. */ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by crc32_tablegen.c. const uint32_t lzma_crc32_table[8][256] = { { diff --git a/contrib/libs/lzma/liblzma/check/crc64_fast.c b/contrib/libs/lzma/liblzma/check/crc64_fast.c index 0c8622a1f3..0ce83fe4ad 100644 --- a/contrib/libs/lzma/liblzma/check/crc64_fast.c +++ b/contrib/libs/lzma/liblzma/check/crc64_fast.c @@ -1,85 +1,30 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file crc64.c /// \brief CRC64 calculation -/// -/// There are two methods in this file. crc64_generic uses the -/// the slice-by-four algorithm. This is the same idea that is -/// used in crc32_fast.c, but for CRC64 we use only four tables -/// instead of eight to avoid increasing CPU cache usage. -/// -/// crc64_clmul uses 32/64-bit x86 SSSE3, SSE4.1, and CLMUL instructions. -/// It was derived from -/// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf -/// and the public domain code from https://github.com/rawrunprotected/crc -/// (URLs were checked on 2022-11-07). -/// -/// FIXME: Builds for 32-bit x86 use crc64_x86.S by default instead -/// of this file and thus CLMUL version isn't available on 32-bit x86 -/// unless configured with --disable-assembler. Even then the lookup table -/// isn't omitted in crc64_table.c since it doesn't know that assembly -/// code has been disabled. // // Authors: Lasse Collin // Ilya Kurdyukov // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "check.h" +#include "crc_common.h" -#undef CRC_GENERIC -#undef CRC_CLMUL -#undef CRC_USE_GENERIC_FOR_SMALL_INPUTS - -// If CLMUL cannot be used then only the generic slice-by-four is built. -#if !defined(HAVE_USABLE_CLMUL) -# define CRC_GENERIC 1 - -// If CLMUL is allowed unconditionally in the compiler options then the -// generic version can be omitted. Note that this doesn't work with MSVC -// as I don't know how to detect the features here. -// -// NOTE: Keep this this in sync with crc64_table.c. -#elif (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \ - || (defined(__e2k__) && __iset__ >= 6) -# define CRC_CLMUL 1 - -// Otherwise build both and detect at runtime which version to use. -#else -# define CRC_GENERIC 1 -# define CRC_CLMUL 1 - -/* - // The generic code is much faster with 1-8-byte inputs and has - // similar performance up to 16 bytes at least in microbenchmarks - // (it depends on input buffer alignment too). If both versions are - // built, this #define will use the generic version for inputs up to - // 16 bytes and CLMUL for bigger inputs. It saves a little in code - // size since the special cases for 0-16-byte inputs will be omitted - // from the CLMUL code. -# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1 -*/ - -# if defined(_MSC_VER) -# include <intrin.h> -# elif defined(HAVE_CPUID_H) -# include <cpuid.h> -# endif +#if defined(CRC_X86_CLMUL) +# define BUILDING_CRC64_CLMUL +# include "crc_x86_clmul.h" #endif +#ifdef CRC64_GENERIC + ///////////////////////////////// // Generic slice-by-four CRC64 // ///////////////////////////////// -#ifdef CRC_GENERIC - -#include "crc_macros.h" - - #ifdef WORDS_BIGENDIAN # define A1(x) ((x) >> 56) #else @@ -94,7 +39,7 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc) crc = ~crc; #ifdef WORDS_BIGENDIAN - crc = bswap64(crc); + crc = byteswap64(crc); #endif if (size > 4) { @@ -128,7 +73,7 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc) crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); #ifdef WORDS_BIGENDIAN - crc = bswap64(crc); + crc = byteswap64(crc); #endif return ~crc; @@ -136,336 +81,40 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc) #endif -///////////////////// -// x86 CLMUL CRC64 // -///////////////////// - -#ifdef CRC_CLMUL - -#include <immintrin.h> - - -/* -// These functions were used to generate the constants -// at the top of crc64_clmul(). -static uint64_t -calc_lo(uint64_t poly) -{ - uint64_t a = poly; - uint64_t b = 0; - - for (unsigned i = 0; i < 64; ++i) { - b = (b >> 1) | (a << 63); - a = (a >> 1) ^ (a & 1 ? poly : 0); - } - - return b; -} - -static uint64_t -calc_hi(uint64_t poly, uint64_t a) -{ - for (unsigned i = 0; i < 64; ++i) - a = (a >> 1) ^ (a & 1 ? poly : 0); - - return a; -} -*/ - - -#define MASK_L(in, mask, r) \ - r = _mm_shuffle_epi8(in, mask) - -#define MASK_H(in, mask, r) \ - r = _mm_shuffle_epi8(in, _mm_xor_si128(mask, vsign)) - -#define MASK_LH(in, mask, low, high) \ - MASK_L(in, mask, low); \ - MASK_H(in, mask, high) - - -// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC -// code when optimizations are enabled (release build). According to the bug -// report, the ebx register is corrupted and the calculated result is wrong. -// Trying to workaround the problem with "__asm mov ebx, ebx" didn't help. -// The following pragma works and performance is still good. x86-64 builds -// aren't affected by this problem. -// -// NOTE: Another pragma after the function restores the optimizations. -// If the #if condition here is updated, the other one must be updated too. -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ - && defined(_M_IX86) -# pragma optimize("g", off) -#endif - -// EDG-based compilers (Intel's classic compiler and compiler for E2K) can -// define __GNUC__ but the attribute must not be used with them. -// The new Clang-based ICX needs the attribute. -// -// NOTE: Build systems check for this too, keep them in sync with this. -#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) -__attribute__((__target__("ssse3,sse4.1,pclmul"))) -#endif -// The intrinsics use 16-byte-aligned reads from buf, thus they may read -// up to 15 bytes before or after the buffer (depending on the alignment -// of the buf argument). The values of the extra bytes are ignored. -// This unavoidably trips -fsanitize=address so address sanitizier has -// to be disabled for this function. -#if lzma_has_attribute(__no_sanitize_address__) -__attribute__((__no_sanitize_address__)) -#endif -static uint64_t -crc64_clmul(const uint8_t *buf, size_t size, uint64_t crc) -{ - // The prototypes of the intrinsics use signed types while most of - // the values are treated as unsigned here. These warnings in this - // function have been checked and found to be harmless so silence them. -#if TUKLIB_GNUC_REQ(4, 6) || defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wsign-conversion" -# pragma GCC diagnostic ignored "-Wconversion" -#endif - -#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS - // The code assumes that there is at least one byte of input. - if (size == 0) - return crc; -#endif - - // const uint64_t poly = 0xc96c5795d7870f42; // CRC polynomial - const uint64_t p = 0x92d8af2baf0e1e85; // (poly << 1) | 1 - const uint64_t mu = 0x9c3e466c172963d5; // (calc_lo(poly) << 1) | 1 - const uint64_t k2 = 0xdabe95afc7875f40; // calc_hi(poly, 1) - const uint64_t k1 = 0xe05dd497ca393ae4; // calc_hi(poly, k2) - const __m128i vfold0 = _mm_set_epi64x(p, mu); - const __m128i vfold1 = _mm_set_epi64x(k2, k1); - - // Create a vector with 8-bit values 0 to 15. This is used to - // construct control masks for _mm_blendv_epi8 and _mm_shuffle_epi8. - const __m128i vramp = _mm_setr_epi32( - 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c); - - // This is used to inverse the control mask of _mm_shuffle_epi8 - // so that bytes that wouldn't be picked with the original mask - // will be picked and vice versa. - const __m128i vsign = _mm_set1_epi8(0x80); - - // Memory addresses A to D and the distances between them: - // - // A B C D - // [skip_start][size][skip_end] - // [ size2 ] - // - // A and D are 16-byte aligned. B and C are 1-byte aligned. - // skip_start and skip_end are 0-15 bytes. size is at least 1 byte. - // - // A = aligned_buf will initially point to this address. - // B = The address pointed by the caller-supplied buf. - // C = buf + size == aligned_buf + size2 - // D = buf + size + skip_end == aligned_buf + size2 + skip_end - const size_t skip_start = (size_t)((uintptr_t)buf & 15); - const size_t skip_end = (size_t)((0U - (uintptr_t)(buf + size)) & 15); - const __m128i *aligned_buf = (const __m128i *)( - (uintptr_t)buf & ~(uintptr_t)15); - - // If size2 <= 16 then the whole input fits into a single 16-byte - // vector. If size2 > 16 then at least two 16-byte vectors must - // be processed. If size2 > 16 && size <= 16 then there is only - // one 16-byte vector's worth of input but it is unaligned in memory. - // - // NOTE: There is no integer overflow here if the arguments are valid. - // If this overflowed, buf + size would too. - size_t size2 = skip_start + size; - - // Masks to be used with _mm_blendv_epi8 and _mm_shuffle_epi8: - // The first skip_start or skip_end bytes in the vectors will have - // the high bit (0x80) set. _mm_blendv_epi8 and _mm_shuffle_epi8 - // will produce zeros for these positions. (Bitwise-xor of these - // masks with vsign will produce the opposite behavior.) - const __m128i mask_start - = _mm_sub_epi8(vramp, _mm_set1_epi8(skip_start)); - const __m128i mask_end = _mm_sub_epi8(vramp, _mm_set1_epi8(skip_end)); - - // Get the first 1-16 bytes into data0. If loading less than 16 bytes, - // the bytes are loaded to the high bits of the vector and the least - // significant positions are filled with zeros. - const __m128i data0 = _mm_blendv_epi8(_mm_load_si128(aligned_buf), - _mm_setzero_si128(), mask_start); - ++aligned_buf; - -#if defined(__i386__) || defined(_M_IX86) - const __m128i initial_crc = _mm_set_epi64x(0, ~crc); -#else - // GCC and Clang would produce good code with _mm_set_epi64x - // but MSVC needs _mm_cvtsi64_si128 on x86-64. - const __m128i initial_crc = _mm_cvtsi64_si128(~crc); -#endif +#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED) - __m128i v0, v1, v2, v3; - -#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS - if (size <= 16) { - // Right-shift initial_crc by 1-16 bytes based on "size" - // and store the result in v1 (high bytes) and v0 (low bytes). - // - // NOTE: The highest 8 bytes of initial_crc are zeros so - // v1 will be filled with zeros if size >= 8. The highest 8 - // bytes of v1 will always become zeros. - // - // [ v1 ][ v0 ] - // [ initial_crc ] size == 1 - // [ initial_crc ] size == 2 - // [ initial_crc ] size == 15 - // [ initial_crc ] size == 16 (all in v0) - const __m128i mask_low = _mm_add_epi8( - vramp, _mm_set1_epi8(size - 16)); - MASK_LH(initial_crc, mask_low, v0, v1); - - if (size2 <= 16) { - // There are 1-16 bytes of input and it is all - // in data0. Copy the input bytes to v3. If there - // are fewer than 16 bytes, the low bytes in v3 - // will be filled with zeros. That is, the input - // bytes are stored to the same position as - // (part of) initial_crc is in v0. - MASK_L(data0, mask_end, v3); - } else { - // There are 2-16 bytes of input but not all bytes - // are in data0. - const __m128i data1 = _mm_load_si128(aligned_buf); - - // Collect the 2-16 input bytes from data0 and data1 - // to v2 and v3, and bitwise-xor them with the - // low bits of initial_crc in v0. Note that the - // the second xor is below this else-block as it - // is shared with the other branch. - MASK_H(data0, mask_end, v2); - MASK_L(data1, mask_end, v3); - v0 = _mm_xor_si128(v0, v2); - } +////////////////////////// +// Function dispatching // +////////////////////////// - v0 = _mm_xor_si128(v0, v3); - v1 = _mm_alignr_epi8(v1, v0, 8); - } else -#endif - { - const __m128i data1 = _mm_load_si128(aligned_buf); - MASK_LH(initial_crc, mask_start, v0, v1); - v0 = _mm_xor_si128(v0, data0); - v1 = _mm_xor_si128(v1, data1); - -#define FOLD \ - v1 = _mm_xor_si128(v1, _mm_clmulepi64_si128(v0, vfold1, 0x00)); \ - v0 = _mm_xor_si128(v1, _mm_clmulepi64_si128(v0, vfold1, 0x11)); - - while (size2 > 32) { - ++aligned_buf; - size2 -= 16; - FOLD - v1 = _mm_load_si128(aligned_buf); - } - - if (size2 < 32) { - MASK_H(v0, mask_end, v2); - MASK_L(v0, mask_end, v0); - MASK_L(v1, mask_end, v3); - v1 = _mm_or_si128(v2, v3); - } - - FOLD - v1 = _mm_srli_si128(v0, 8); -#undef FOLD - } +// If both the generic and arch-optimized implementations are usable, then +// the function that is used is selected at runtime. See crc32_fast.c. - v1 = _mm_xor_si128(_mm_clmulepi64_si128(v0, vfold1, 0x10), v1); - v0 = _mm_clmulepi64_si128(v1, vfold0, 0x00); - v2 = _mm_clmulepi64_si128(v0, vfold0, 0x10); - v0 = _mm_xor_si128(_mm_xor_si128(v2, _mm_slli_si128(v0, 8)), v1); +typedef uint64_t (*crc64_func_type)( + const uint8_t *buf, size_t size, uint64_t crc); -#if defined(__i386__) || defined(_M_IX86) - return ~(((uint64_t)(uint32_t)_mm_extract_epi32(v0, 3) << 32) | - (uint64_t)(uint32_t)_mm_extract_epi32(v0, 2)); -#else - return ~(uint64_t)_mm_extract_epi64(v0, 1); -#endif - -#if TUKLIB_GNUC_REQ(4, 6) || defined(__clang__) -# pragma GCC diagnostic pop -#endif -} -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ - && defined(_M_IX86) -# pragma optimize("", on) -#endif -#endif - - -//////////////////////// -// Detect CPU support // -//////////////////////// - -#if defined(CRC_GENERIC) && defined(CRC_CLMUL) -static inline bool -is_clmul_supported(void) +static crc64_func_type +crc64_resolve(void) { - int success = 1; - uint32_t r[4]; // eax, ebx, ecx, edx - -#if defined(_MSC_VER) - // This needs <intrin.h> with MSVC. ICC has it as a built-in - // on all platforms. - __cpuid(r, 1); -#elif defined(HAVE_CPUID_H) - // Compared to just using __asm__ to run CPUID, this also checks - // that CPUID is supported and saves and restores ebx as that is - // needed with GCC < 5 with position-independent code (PIC). - success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]); -#else - // Just a fallback that shouldn't be needed. - __asm__("cpuid\n\t" - : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3]) - : "a"(1), "c"(0)); -#endif - - // Returns true if these are supported: - // CLMUL (bit 1 in ecx) - // SSSE3 (bit 9 in ecx) - // SSE4.1 (bit 19 in ecx) - const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19); - return success && (r[2] & ecx_mask) == ecx_mask; - - // Alternative methods that weren't used: - // - ICC's _may_i_use_cpu_feature: the other methods should work too. - // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul") - // - // CPUID decding is needed with MSVC anyway and older GCC. This keeps - // the feature checks in the build system simpler too. The nice thing - // about __builtin_cpu_supports would be that it generates very short - // code as is it only reads a variable set at startup but a few bytes - // doesn't matter here. + return is_arch_extension_supported() + ? &crc64_arch_optimized : &crc64_generic; } - #ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR -# define CRC64_FUNC_INIT # define CRC64_SET_FUNC_ATTR __attribute__((__constructor__)) +static crc64_func_type crc64_func; #else -# define CRC64_FUNC_INIT = &crc64_dispatch # define CRC64_SET_FUNC_ATTR static uint64_t crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc); +static crc64_func_type crc64_func = &crc64_dispatch; #endif -// Pointer to the the selected CRC64 method. -static uint64_t (*crc64_func)(const uint8_t *buf, size_t size, uint64_t crc) - CRC64_FUNC_INIT; - - CRC64_SET_FUNC_ATTR static void crc64_set_func(void) { - crc64_func = is_clmul_supported() ? &crc64_clmul : &crc64_generic; + crc64_func = crc64_resolve(); return; } @@ -474,12 +123,6 @@ crc64_set_func(void) static uint64_t crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc) { - // When __attribute__((__constructor__)) isn't supported, set the - // function pointer without any locking. If multiple threads run - // the detection code in parallel, they will all end up setting - // the pointer to the same value. This avoids the use of - // mythread_once() on every call to lzma_crc64() but this likely - // isn't strictly standards compliant. Let's change it if it breaks. crc64_set_func(); return crc64_func(buf, size, crc); } @@ -490,47 +133,22 @@ crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc) extern LZMA_API(uint64_t) lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) { -#if defined(CRC_GENERIC) && defined(CRC_CLMUL) - // If CLMUL is available, it is the best for non-tiny inputs, - // being over twice as fast as the generic slice-by-four version. - // However, for size <= 16 it's different. In the extreme case - // of size == 1 the generic version can be five times faster. - // At size >= 8 the CLMUL starts to become reasonable. It - // varies depending on the alignment of buf too. - // - // The above doesn't include the overhead of mythread_once(). - // At least on x86-64 GNU/Linux, pthread_once() is very fast but - // it still makes lzma_crc64(buf, 1, crc) 50-100 % slower. When - // size reaches 12-16 bytes the overhead becomes negligible. - // - // So using the generic version for size <= 16 may give better - // performance with tiny inputs but if such inputs happen rarely - // it's not so obvious because then the lookup table of the - // generic version may not be in the processor cache. +#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED) + #ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS if (size <= 16) return crc64_generic(buf, size, crc); #endif - -/* -#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR - // See crc64_dispatch(). This would be the alternative which uses - // locking and doesn't use crc64_dispatch(). Note that on Windows - // this method needs Vista threads. - mythread_once(crc64_set_func); -#endif -*/ - return crc64_func(buf, size, crc); -#elif defined(CRC_CLMUL) - // If CLMUL is used unconditionally without runtime CPU detection - // then omitting the generic version and its 8 KiB lookup table - // makes the library smaller. +#elif defined(CRC64_ARCH_OPTIMIZED) + // If arch-optimized version is used unconditionally without runtime + // CPU detection then omitting the generic version and its 8 KiB + // lookup table makes the library smaller. // // FIXME: Lookup table isn't currently omitted on 32-bit x86, // see crc64_table.c. - return crc64_clmul(buf, size, crc); + return crc64_arch_optimized(buf, size, crc); #else return crc64_generic(buf, size, crc); diff --git a/contrib/libs/lzma/liblzma/check/crc64_table.c b/contrib/libs/lzma/liblzma/check/crc64_table.c index bac9301aa4..e7ceb0276f 100644 --- a/contrib/libs/lzma/liblzma/check/crc64_table.c +++ b/contrib/libs/lzma/liblzma/check/crc64_table.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file crc64_table.c @@ -5,19 +7,21 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" -// FIXME: Compared to crc64_fast.c this has to check for __x86_64__ too +// FIXME: Compared to crc_common.h this has to check for __x86_64__ too // so that in 32-bit builds crc64_x86.S won't break due to a missing table. -#if (defined(__x86_64__) && defined(__SSSE3__) \ +#if defined(HAVE_USABLE_CLMUL) && ((defined(__x86_64__) && defined(__SSSE3__) \ && defined(__SSE4_1__) && defined(__PCLMUL__)) \ - || (defined(__e2k__) && __iset__ >= 6) + || (defined(__e2k__) && __iset__ >= 6)) +# define NO_CRC64_TABLE +#endif + + +#ifdef NO_CRC64_TABLE // No table needed. Use a typedef to avoid an empty translation unit. typedef void lzma_crc64_dummy; diff --git a/contrib/libs/lzma/liblzma/check/crc64_table_le.h b/contrib/libs/lzma/liblzma/check/crc64_table_le.h index 1196b31e13..e40a8c8210 100644 --- a/contrib/libs/lzma/liblzma/check/crc64_table_le.h +++ b/contrib/libs/lzma/liblzma/check/crc64_table_le.h @@ -1,4 +1,6 @@ -/* This file has been automatically generated by crc64_tablegen.c. */ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by crc64_tablegen.c. const uint64_t lzma_crc64_table[4][256] = { { diff --git a/contrib/libs/lzma/liblzma/check/crc_common.h b/contrib/libs/lzma/liblzma/check/crc_common.h new file mode 100644 index 0000000000..63a7b5cefe --- /dev/null +++ b/contrib/libs/lzma/liblzma/check/crc_common.h @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc_common.h +/// \brief Some functions and macros for CRC32 and CRC64 +// +// Authors: Lasse Collin +// Ilya Kurdyukov +// Hans Jansen +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CRC_COMMON_H +#define LZMA_CRC_COMMON_H + +#include "common.h" + + +#ifdef WORDS_BIGENDIAN +# define A(x) ((x) >> 24) +# define B(x) (((x) >> 16) & 0xFF) +# define C(x) (((x) >> 8) & 0xFF) +# define D(x) ((x) & 0xFF) + +# define S8(x) ((x) << 8) +# define S32(x) ((x) << 32) + +#else +# define A(x) ((x) & 0xFF) +# define B(x) (((x) >> 8) & 0xFF) +# define C(x) (((x) >> 16) & 0xFF) +# define D(x) ((x) >> 24) + +# define S8(x) ((x) >> 8) +# define S32(x) ((x) >> 32) +#endif + + +// CRC CLMUL code needs this because accessing input buffers that aren't +// aligned to the vector size will inherently trip the address sanitizer. +#if lzma_has_attribute(__no_sanitize_address__) +# define crc_attr_no_sanitize_address \ + __attribute__((__no_sanitize_address__)) +#else +# define crc_attr_no_sanitize_address +#endif + +// Keep this in sync with changes to crc32_arm64.h +#if defined(_WIN32) || defined(HAVE_GETAUXVAL) \ + || defined(HAVE_ELF_AUX_INFO) \ + || (defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)) +# define ARM64_RUNTIME_DETECTION 1 +#endif + + +#undef CRC32_GENERIC +#undef CRC64_GENERIC + +#undef CRC32_ARCH_OPTIMIZED +#undef CRC64_ARCH_OPTIMIZED + +// The x86 CLMUL is used for both CRC32 and CRC64. +#undef CRC_X86_CLMUL + +#undef CRC32_ARM64 +#undef CRC64_ARM64_CLMUL + +#undef CRC_USE_GENERIC_FOR_SMALL_INPUTS + +// ARM64 CRC32 instruction is only useful for CRC32. Currently, only +// little endian is supported since we were unable to test on a big +// endian machine. +// +// NOTE: Keep this and the next check in sync with the macro +// NO_CRC32_TABLE in crc32_table.c +#if defined(HAVE_ARM64_CRC32) && !defined(WORDS_BIGENDIAN) +// Allow ARM64 CRC32 instruction without a runtime check if +// __ARM_FEATURE_CRC32 is defined. GCC and Clang only define this if the +// proper compiler options are used. +# if defined(__ARM_FEATURE_CRC32) +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC32_ARM64 1 +# elif defined(ARM64_RUNTIME_DETECTION) +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC32_ARM64 1 +# define CRC32_GENERIC 1 +# endif +#endif + +#if defined(HAVE_USABLE_CLMUL) +// If CLMUL is allowed unconditionally in the compiler options then the +// generic version can be omitted. Note that this doesn't work with MSVC +// as I don't know how to detect the features here. +// +// NOTE: Keep this in sync with the NO_CRC32_TABLE macro in crc32_table.c +// and NO_CRC64_TABLE in crc64_table.c. +# if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \ + || (defined(__e2k__) && __iset__ >= 6) +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC64_ARCH_OPTIMIZED 1 +# define CRC_X86_CLMUL 1 +# else +# define CRC32_GENERIC 1 +# define CRC64_GENERIC 1 +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC64_ARCH_OPTIMIZED 1 +# define CRC_X86_CLMUL 1 + +/* + // The generic code is much faster with 1-8-byte inputs and + // has similar performance up to 16 bytes at least in + // microbenchmarks (it depends on input buffer alignment + // too). If both versions are built, this #define will use + // the generic version for inputs up to 16 bytes and CLMUL + // for bigger inputs. It saves a little in code size since + // the special cases for 0-16-byte inputs will be omitted + // from the CLMUL code. +# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1 +*/ +# endif +#endif + +// For CRC32 use the generic slice-by-eight implementation if no optimized +// version is available. +#if !defined(CRC32_ARCH_OPTIMIZED) && !defined(CRC32_GENERIC) +# define CRC32_GENERIC 1 +#endif + +// For CRC64 use the generic slice-by-four implementation if no optimized +// version is available. +#if !defined(CRC64_ARCH_OPTIMIZED) && !defined(CRC64_GENERIC) +# define CRC64_GENERIC 1 +#endif + +#endif diff --git a/contrib/libs/lzma/liblzma/check/crc_macros.h b/contrib/libs/lzma/liblzma/check/crc_macros.h deleted file mode 100644 index a7c21b765d..0000000000 --- a/contrib/libs/lzma/liblzma/check/crc_macros.h +++ /dev/null @@ -1,30 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file crc_macros.h -/// \brief Some endian-dependent macros for CRC32 and CRC64 -// -// Author: Lasse Collin -// -// This file has been put into the public domain. -// You can do whatever you want with this file. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifdef WORDS_BIGENDIAN -# define A(x) ((x) >> 24) -# define B(x) (((x) >> 16) & 0xFF) -# define C(x) (((x) >> 8) & 0xFF) -# define D(x) ((x) & 0xFF) - -# define S8(x) ((x) << 8) -# define S32(x) ((x) << 32) - -#else -# define A(x) ((x) & 0xFF) -# define B(x) (((x) >> 8) & 0xFF) -# define C(x) (((x) >> 16) & 0xFF) -# define D(x) ((x) >> 24) - -# define S8(x) ((x) >> 8) -# define S32(x) ((x) >> 32) -#endif diff --git a/contrib/libs/lzma/liblzma/check/crc_x86_clmul.h b/contrib/libs/lzma/liblzma/check/crc_x86_clmul.h new file mode 100644 index 0000000000..f1254ece18 --- /dev/null +++ b/contrib/libs/lzma/liblzma/check/crc_x86_clmul.h @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc_x86_clmul.h +/// \brief CRC32 and CRC64 implementations using CLMUL instructions. +/// +/// The CRC32 and CRC64 implementations use 32/64-bit x86 SSSE3, SSE4.1, and +/// CLMUL instructions. This is compatible with Elbrus 2000 (E2K) too. +/// +/// They were derived from +/// https://www.researchgate.net/publication/263424619_Fast_CRC_computation +/// and the public domain code from https://github.com/rawrunprotected/crc +/// (URLs were checked on 2023-10-14). +/// +/// While this file has both CRC32 and CRC64 implementations, only one +/// should be built at a time to ensure that crc_simd_body() is inlined +/// even with compilers with which lzma_always_inline expands to plain inline. +/// The version to build is selected by defining BUILDING_CRC32_CLMUL or +/// BUILDING_CRC64_CLMUL before including this file. +/// +/// FIXME: Builds for 32-bit x86 use the assembly .S files by default +/// unless configured with --disable-assembler. Even then the lookup table +/// isn't omitted in crc64_table.c since it doesn't know that assembly +/// code has been disabled. +// +// Authors: Ilya Kurdyukov +// Hans Jansen +// Lasse Collin +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +// This file must not be included more than once. +#ifdef LZMA_CRC_X86_CLMUL_H +# error crc_x86_clmul.h was included twice. +#endif +#define LZMA_CRC_X86_CLMUL_H + +#include <immintrin.h> + +#if defined(_MSC_VER) +# include <intrin.h> +#elif defined(HAVE_CPUID_H) +# include <cpuid.h> +#endif + + +// EDG-based compilers (Intel's classic compiler and compiler for E2K) can +// define __GNUC__ but the attribute must not be used with them. +// The new Clang-based ICX needs the attribute. +// +// NOTE: Build systems check for this too, keep them in sync with this. +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) +# define crc_attr_target \ + __attribute__((__target__("ssse3,sse4.1,pclmul"))) +#else +# define crc_attr_target +#endif + + +#define MASK_L(in, mask, r) r = _mm_shuffle_epi8(in, mask) + +#define MASK_H(in, mask, r) \ + r = _mm_shuffle_epi8(in, _mm_xor_si128(mask, vsign)) + +#define MASK_LH(in, mask, low, high) \ + MASK_L(in, mask, low); \ + MASK_H(in, mask, high) + + +crc_attr_target +crc_attr_no_sanitize_address +static lzma_always_inline void +crc_simd_body(const uint8_t *buf, const size_t size, __m128i *v0, __m128i *v1, + const __m128i vfold16, const __m128i initial_crc) +{ + // Create a vector with 8-bit values 0 to 15. This is used to + // construct control masks for _mm_blendv_epi8 and _mm_shuffle_epi8. + const __m128i vramp = _mm_setr_epi32( + 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c); + + // This is used to inverse the control mask of _mm_shuffle_epi8 + // so that bytes that wouldn't be picked with the original mask + // will be picked and vice versa. + const __m128i vsign = _mm_set1_epi8(-0x80); + + // Memory addresses A to D and the distances between them: + // + // A B C D + // [skip_start][size][skip_end] + // [ size2 ] + // + // A and D are 16-byte aligned. B and C are 1-byte aligned. + // skip_start and skip_end are 0-15 bytes. size is at least 1 byte. + // + // A = aligned_buf will initially point to this address. + // B = The address pointed by the caller-supplied buf. + // C = buf + size == aligned_buf + size2 + // D = buf + size + skip_end == aligned_buf + size2 + skip_end + const size_t skip_start = (size_t)((uintptr_t)buf & 15); + const size_t skip_end = (size_t)((0U - (uintptr_t)(buf + size)) & 15); + const __m128i *aligned_buf = (const __m128i *)( + (uintptr_t)buf & ~(uintptr_t)15); + + // If size2 <= 16 then the whole input fits into a single 16-byte + // vector. If size2 > 16 then at least two 16-byte vectors must + // be processed. If size2 > 16 && size <= 16 then there is only + // one 16-byte vector's worth of input but it is unaligned in memory. + // + // NOTE: There is no integer overflow here if the arguments + // are valid. If this overflowed, buf + size would too. + const size_t size2 = skip_start + size; + + // Masks to be used with _mm_blendv_epi8 and _mm_shuffle_epi8: + // The first skip_start or skip_end bytes in the vectors will have + // the high bit (0x80) set. _mm_blendv_epi8 and _mm_shuffle_epi8 + // will produce zeros for these positions. (Bitwise-xor of these + // masks with vsign will produce the opposite behavior.) + const __m128i mask_start + = _mm_sub_epi8(vramp, _mm_set1_epi8((char)skip_start)); + const __m128i mask_end + = _mm_sub_epi8(vramp, _mm_set1_epi8((char)skip_end)); + + // Get the first 1-16 bytes into data0. If loading less than 16 + // bytes, the bytes are loaded to the high bits of the vector and + // the least significant positions are filled with zeros. + const __m128i data0 = _mm_blendv_epi8(_mm_load_si128(aligned_buf), + _mm_setzero_si128(), mask_start); + aligned_buf++; + + __m128i v2, v3; + +#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS + if (size <= 16) { + // Right-shift initial_crc by 1-16 bytes based on "size" + // and store the result in v1 (high bytes) and v0 (low bytes). + // + // NOTE: The highest 8 bytes of initial_crc are zeros so + // v1 will be filled with zeros if size >= 8. The highest + // 8 bytes of v1 will always become zeros. + // + // [ v1 ][ v0 ] + // [ initial_crc ] size == 1 + // [ initial_crc ] size == 2 + // [ initial_crc ] size == 15 + // [ initial_crc ] size == 16 (all in v0) + const __m128i mask_low = _mm_add_epi8( + vramp, _mm_set1_epi8((char)(size - 16))); + MASK_LH(initial_crc, mask_low, *v0, *v1); + + if (size2 <= 16) { + // There are 1-16 bytes of input and it is all + // in data0. Copy the input bytes to v3. If there + // are fewer than 16 bytes, the low bytes in v3 + // will be filled with zeros. That is, the input + // bytes are stored to the same position as + // (part of) initial_crc is in v0. + MASK_L(data0, mask_end, v3); + } else { + // There are 2-16 bytes of input but not all bytes + // are in data0. + const __m128i data1 = _mm_load_si128(aligned_buf); + + // Collect the 2-16 input bytes from data0 and data1 + // to v2 and v3, and bitwise-xor them with the + // low bits of initial_crc in v0. Note that the + // the second xor is below this else-block as it + // is shared with the other branch. + MASK_H(data0, mask_end, v2); + MASK_L(data1, mask_end, v3); + *v0 = _mm_xor_si128(*v0, v2); + } + + *v0 = _mm_xor_si128(*v0, v3); + *v1 = _mm_alignr_epi8(*v1, *v0, 8); + } else +#endif + { + // There is more than 16 bytes of input. + const __m128i data1 = _mm_load_si128(aligned_buf); + const __m128i *end = (const __m128i*)( + (const char *)aligned_buf - 16 + size2); + aligned_buf++; + + MASK_LH(initial_crc, mask_start, *v0, *v1); + *v0 = _mm_xor_si128(*v0, data0); + *v1 = _mm_xor_si128(*v1, data1); + + while (aligned_buf < end) { + *v1 = _mm_xor_si128(*v1, _mm_clmulepi64_si128( + *v0, vfold16, 0x00)); + *v0 = _mm_xor_si128(*v1, _mm_clmulepi64_si128( + *v0, vfold16, 0x11)); + *v1 = _mm_load_si128(aligned_buf++); + } + + if (aligned_buf != end) { + MASK_H(*v0, mask_end, v2); + MASK_L(*v0, mask_end, *v0); + MASK_L(*v1, mask_end, v3); + *v1 = _mm_or_si128(v2, v3); + } + + *v1 = _mm_xor_si128(*v1, _mm_clmulepi64_si128( + *v0, vfold16, 0x00)); + *v0 = _mm_xor_si128(*v1, _mm_clmulepi64_si128( + *v0, vfold16, 0x11)); + *v1 = _mm_srli_si128(*v0, 8); + } +} + + +///////////////////// +// x86 CLMUL CRC32 // +///////////////////// + +/* +// These functions were used to generate the constants +// at the top of crc32_arch_optimized(). +static uint64_t +calc_lo(uint64_t p, uint64_t a, int n) +{ + uint64_t b = 0; int i; + for (i = 0; i < n; i++) { + b = b >> 1 | (a & 1) << (n - 1); + a = (a >> 1) ^ ((0 - (a & 1)) & p); + } + return b; +} + +// same as ~crc(&a, sizeof(a), ~0) +static uint64_t +calc_hi(uint64_t p, uint64_t a, int n) +{ + int i; + for (i = 0; i < n; i++) + a = (a >> 1) ^ ((0 - (a & 1)) & p); + return a; +} +*/ + +#ifdef BUILDING_CRC32_CLMUL + +crc_attr_target +crc_attr_no_sanitize_address +static uint32_t +crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc) +{ +#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS + // The code assumes that there is at least one byte of input. + if (size == 0) + return crc; +#endif + + // uint32_t poly = 0xedb88320; + const int64_t p = 0x1db710640; // p << 1 + const int64_t mu = 0x1f7011641; // calc_lo(p, p, 32) << 1 | 1 + const int64_t k5 = 0x163cd6124; // calc_hi(p, p, 32) << 1 + const int64_t k4 = 0x0ccaa009e; // calc_hi(p, p, 64) << 1 + const int64_t k3 = 0x1751997d0; // calc_hi(p, p, 128) << 1 + + const __m128i vfold4 = _mm_set_epi64x(mu, p); + const __m128i vfold8 = _mm_set_epi64x(0, k5); + const __m128i vfold16 = _mm_set_epi64x(k4, k3); + + __m128i v0, v1, v2; + + crc_simd_body(buf, size, &v0, &v1, vfold16, + _mm_cvtsi32_si128((int32_t)~crc)); + + v1 = _mm_xor_si128( + _mm_clmulepi64_si128(v0, vfold16, 0x10), v1); // xxx0 + v2 = _mm_shuffle_epi32(v1, 0xe7); // 0xx0 + v0 = _mm_slli_epi64(v1, 32); // [0] + v0 = _mm_clmulepi64_si128(v0, vfold8, 0x00); + v0 = _mm_xor_si128(v0, v2); // [1] [2] + v2 = _mm_clmulepi64_si128(v0, vfold4, 0x10); + v2 = _mm_clmulepi64_si128(v2, vfold4, 0x00); + v0 = _mm_xor_si128(v0, v2); // [2] + return ~(uint32_t)_mm_extract_epi32(v0, 2); +} +#endif // BUILDING_CRC32_CLMUL + + +///////////////////// +// x86 CLMUL CRC64 // +///////////////////// + +/* +// These functions were used to generate the constants +// at the top of crc64_arch_optimized(). +static uint64_t +calc_lo(uint64_t poly) +{ + uint64_t a = poly; + uint64_t b = 0; + + for (unsigned i = 0; i < 64; ++i) { + b = (b >> 1) | (a << 63); + a = (a >> 1) ^ (a & 1 ? poly : 0); + } + + return b; +} + +static uint64_t +calc_hi(uint64_t poly, uint64_t a) +{ + for (unsigned i = 0; i < 64; ++i) + a = (a >> 1) ^ (a & 1 ? poly : 0); + + return a; +} +*/ + +#ifdef BUILDING_CRC64_CLMUL + +// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC +// code when optimizations are enabled (release build). According to the bug +// report, the ebx register is corrupted and the calculated result is wrong. +// Trying to workaround the problem with "__asm mov ebx, ebx" didn't help. +// The following pragma works and performance is still good. x86-64 builds +// and CRC32 CLMUL aren't affected by this problem. The problem does not +// happen in crc_simd_body() either (which is shared with CRC32 CLMUL anyway). +// +// NOTE: Another pragma after crc64_arch_optimized() restores +// the optimizations. If the #if condition here is updated, +// the other one must be updated too. +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ + && defined(_M_IX86) +# pragma optimize("g", off) +#endif + +crc_attr_target +crc_attr_no_sanitize_address +static uint64_t +crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc) +{ +#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS + // The code assumes that there is at least one byte of input. + if (size == 0) + return crc; +#endif + + // const uint64_t poly = 0xc96c5795d7870f42; // CRC polynomial + const uint64_t p = 0x92d8af2baf0e1e85; // (poly << 1) | 1 + const uint64_t mu = 0x9c3e466c172963d5; // (calc_lo(poly) << 1) | 1 + const uint64_t k2 = 0xdabe95afc7875f40; // calc_hi(poly, 1) + const uint64_t k1 = 0xe05dd497ca393ae4; // calc_hi(poly, k2) + + const __m128i vfold8 = _mm_set_epi64x((int64_t)p, (int64_t)mu); + const __m128i vfold16 = _mm_set_epi64x((int64_t)k2, (int64_t)k1); + + __m128i v0, v1, v2; + +#if defined(__i386__) || defined(_M_IX86) + crc_simd_body(buf, size, &v0, &v1, vfold16, + _mm_set_epi64x(0, (int64_t)~crc)); +#else + // GCC and Clang would produce good code with _mm_set_epi64x + // but MSVC needs _mm_cvtsi64_si128 on x86-64. + crc_simd_body(buf, size, &v0, &v1, vfold16, + _mm_cvtsi64_si128((int64_t)~crc)); +#endif + + v1 = _mm_xor_si128(_mm_clmulepi64_si128(v0, vfold16, 0x10), v1); + v0 = _mm_clmulepi64_si128(v1, vfold8, 0x00); + v2 = _mm_clmulepi64_si128(v0, vfold8, 0x10); + v0 = _mm_xor_si128(_mm_xor_si128(v1, _mm_slli_si128(v0, 8)), v2); + +#if defined(__i386__) || defined(_M_IX86) + return ~(((uint64_t)(uint32_t)_mm_extract_epi32(v0, 3) << 32) | + (uint64_t)(uint32_t)_mm_extract_epi32(v0, 2)); +#else + return ~(uint64_t)_mm_extract_epi64(v0, 1); +#endif +} + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ + && defined(_M_IX86) +# pragma optimize("", on) +#endif + +#endif // BUILDING_CRC64_CLMUL + + +// Inlining this function duplicates the function body in crc32_resolve() and +// crc64_resolve(), but this is acceptable because this is a tiny function. +static inline bool +is_arch_extension_supported(void) +{ + int success = 1; + uint32_t r[4]; // eax, ebx, ecx, edx + +#if defined(_MSC_VER) + // This needs <intrin.h> with MSVC. ICC has it as a built-in + // on all platforms. + __cpuid(r, 1); +#elif defined(HAVE_CPUID_H) + // Compared to just using __asm__ to run CPUID, this also checks + // that CPUID is supported and saves and restores ebx as that is + // needed with GCC < 5 with position-independent code (PIC). + success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]); +#else + // Just a fallback that shouldn't be needed. + __asm__("cpuid\n\t" + : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3]) + : "a"(1), "c"(0)); +#endif + + // Returns true if these are supported: + // CLMUL (bit 1 in ecx) + // SSSE3 (bit 9 in ecx) + // SSE4.1 (bit 19 in ecx) + const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19); + return success && (r[2] & ecx_mask) == ecx_mask; + + // Alternative methods that weren't used: + // - ICC's _may_i_use_cpu_feature: the other methods should work too. + // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul") + // + // CPUID decding is needed with MSVC anyway and older GCC. This keeps + // the feature checks in the build system simpler too. The nice thing + // about __builtin_cpu_supports would be that it generates very short + // code as is it only reads a variable set at startup but a few bytes + // doesn't matter here. +} diff --git a/contrib/libs/lzma/liblzma/check/sha256.c b/contrib/libs/lzma/liblzma/check/sha256.c index 6feb342565..bd0d280639 100644 --- a/contrib/libs/lzma/liblzma/check/sha256.c +++ b/contrib/libs/lzma/liblzma/check/sha256.c @@ -1,24 +1,17 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file sha256.c /// \brief SHA-256 -/// -/// \todo Crypto++ has x86 ASM optimizations. They use SSE so if they -/// are imported to liblzma, SSE instructions need to be used -/// conditionally to keep the code working on older boxes. // -// This code is based on the code found from 7-Zip, which has a modified -// version of the SHA-256 found from Crypto++ <https://www.cryptopp.com/>. -// The code was modified a little to fit into liblzma. +// The C code is based on the public domain SHA-256 code found from +// Crypto++ Library 5.5.1 released in 2007: https://www.cryptopp.com/ +// A few minor tweaks have been made in liblzma. // -// Authors: Kevin Springle -// Wei Dai -// Igor Pavlov +// Authors: Wei Dai // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "check.h" diff --git a/contrib/libs/lzma/liblzma/common/alone_decoder.c b/contrib/libs/lzma/liblzma/common/alone_decoder.c index 1dc85badf9..78af651578 100644 --- a/contrib/libs/lzma/liblzma/common/alone_decoder.c +++ b/contrib/libs/lzma/liblzma/common/alone_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file alone_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "alone_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/alone_decoder.h b/contrib/libs/lzma/liblzma/common/alone_decoder.h index dfa031aa77..61ee24d97f 100644 --- a/contrib/libs/lzma/liblzma/common/alone_decoder.h +++ b/contrib/libs/lzma/liblzma/common/alone_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file alone_decoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_ALONE_DECODER_H diff --git a/contrib/libs/lzma/liblzma/common/alone_encoder.c b/contrib/libs/lzma/liblzma/common/alone_encoder.c index 7d3812fa6e..21b039509a 100644 --- a/contrib/libs/lzma/liblzma/common/alone_encoder.c +++ b/contrib/libs/lzma/liblzma/common/alone_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file alone_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/auto_decoder.c b/contrib/libs/lzma/liblzma/common/auto_decoder.c index 2a5c0894d1..fdd520f905 100644 --- a/contrib/libs/lzma/liblzma/common/auto_decoder.c +++ b/contrib/libs/lzma/liblzma/common/auto_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file auto_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "stream_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/block_buffer_decoder.c b/contrib/libs/lzma/liblzma/common/block_buffer_decoder.c index b0ded90ddc..55566cd2f2 100644 --- a/contrib/libs/lzma/liblzma/common/block_buffer_decoder.c +++ b/contrib/libs/lzma/liblzma/common/block_buffer_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_buffer_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "block_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/block_buffer_encoder.c b/contrib/libs/lzma/liblzma/common/block_buffer_encoder.c index fdef02de89..df3b90e8a1 100644 --- a/contrib/libs/lzma/liblzma/common/block_buffer_encoder.c +++ b/contrib/libs/lzma/liblzma/common/block_buffer_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_buffer_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "block_buffer_encoder.h" diff --git a/contrib/libs/lzma/liblzma/common/block_buffer_encoder.h b/contrib/libs/lzma/liblzma/common/block_buffer_encoder.h index 653207f734..5274ac40d3 100644 --- a/contrib/libs/lzma/liblzma/common/block_buffer_encoder.h +++ b/contrib/libs/lzma/liblzma/common/block_buffer_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_buffer_encoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_BLOCK_BUFFER_ENCODER_H diff --git a/contrib/libs/lzma/liblzma/common/block_decoder.c b/contrib/libs/lzma/liblzma/common/block_decoder.c index be647d4855..2e369d316b 100644 --- a/contrib/libs/lzma/liblzma/common/block_decoder.c +++ b/contrib/libs/lzma/liblzma/common/block_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "block_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/block_decoder.h b/contrib/libs/lzma/liblzma/common/block_decoder.h index 718c5ced88..2cbf9ba6db 100644 --- a/contrib/libs/lzma/liblzma/common/block_decoder.h +++ b/contrib/libs/lzma/liblzma/common/block_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_decoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_BLOCK_DECODER_H diff --git a/contrib/libs/lzma/liblzma/common/block_encoder.c b/contrib/libs/lzma/liblzma/common/block_encoder.c index 4a136ef65e..ce8c1de694 100644 --- a/contrib/libs/lzma/liblzma/common/block_encoder.c +++ b/contrib/libs/lzma/liblzma/common/block_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "block_encoder.h" diff --git a/contrib/libs/lzma/liblzma/common/block_encoder.h b/contrib/libs/lzma/liblzma/common/block_encoder.h index bd97c186e5..b7dfe9a084 100644 --- a/contrib/libs/lzma/liblzma/common/block_encoder.h +++ b/contrib/libs/lzma/liblzma/common/block_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_encoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_BLOCK_ENCODER_H diff --git a/contrib/libs/lzma/liblzma/common/block_header_decoder.c b/contrib/libs/lzma/liblzma/common/block_header_decoder.c index c4935dcf46..f0b2fbe54d 100644 --- a/contrib/libs/lzma/liblzma/common/block_header_decoder.c +++ b/contrib/libs/lzma/liblzma/common/block_header_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_header_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/block_header_encoder.c b/contrib/libs/lzma/liblzma/common/block_header_encoder.c index 160425d27a..45e57a26ab 100644 --- a/contrib/libs/lzma/liblzma/common/block_header_encoder.c +++ b/contrib/libs/lzma/liblzma/common/block_header_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_header_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/block_util.c b/contrib/libs/lzma/liblzma/common/block_util.c index acb311142c..191f6d444a 100644 --- a/contrib/libs/lzma/liblzma/common/block_util.c +++ b/contrib/libs/lzma/liblzma/common/block_util.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file block_util.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/common.c b/contrib/libs/lzma/liblzma/common/common.c index adb50d785d..cc0e06a51b 100644 --- a/contrib/libs/lzma/liblzma/common/common.c +++ b/contrib/libs/lzma/liblzma/common/common.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file common.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/common.h b/contrib/libs/lzma/liblzma/common/common.h index 378923e401..20af32f6d6 100644 --- a/contrib/libs/lzma/liblzma/common/common.h +++ b/contrib/libs/lzma/liblzma/common/common.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file common.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_COMMON_H diff --git a/contrib/libs/lzma/liblzma/common/easy_buffer_encoder.c b/contrib/libs/lzma/liblzma/common/easy_buffer_encoder.c index 48eb56f5cc..da610cea6b 100644 --- a/contrib/libs/lzma/liblzma/common/easy_buffer_encoder.c +++ b/contrib/libs/lzma/liblzma/common/easy_buffer_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file easy_buffer_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "easy_preset.h" diff --git a/contrib/libs/lzma/liblzma/common/easy_decoder_memusage.c b/contrib/libs/lzma/liblzma/common/easy_decoder_memusage.c index 20bcd5b717..0c76f10033 100644 --- a/contrib/libs/lzma/liblzma/common/easy_decoder_memusage.c +++ b/contrib/libs/lzma/liblzma/common/easy_decoder_memusage.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file easy_decoder_memusage.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "easy_preset.h" diff --git a/contrib/libs/lzma/liblzma/common/easy_encoder.c b/contrib/libs/lzma/liblzma/common/easy_encoder.c index 5cb492dd06..8dfe29610f 100644 --- a/contrib/libs/lzma/liblzma/common/easy_encoder.c +++ b/contrib/libs/lzma/liblzma/common/easy_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file easy_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "easy_preset.h" diff --git a/contrib/libs/lzma/liblzma/common/easy_encoder_memusage.c b/contrib/libs/lzma/liblzma/common/easy_encoder_memusage.c index e910575842..1184ac6654 100644 --- a/contrib/libs/lzma/liblzma/common/easy_encoder_memusage.c +++ b/contrib/libs/lzma/liblzma/common/easy_encoder_memusage.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file easy_encoder_memusage.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "easy_preset.h" diff --git a/contrib/libs/lzma/liblzma/common/easy_preset.c b/contrib/libs/lzma/liblzma/common/easy_preset.c index 2f9859860a..7908a2bb73 100644 --- a/contrib/libs/lzma/liblzma/common/easy_preset.c +++ b/contrib/libs/lzma/liblzma/common/easy_preset.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file easy_preset.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "easy_preset.h" diff --git a/contrib/libs/lzma/liblzma/common/easy_preset.h b/contrib/libs/lzma/liblzma/common/easy_preset.h index 382ade8940..4ef6d044ad 100644 --- a/contrib/libs/lzma/liblzma/common/easy_preset.h +++ b/contrib/libs/lzma/liblzma/common/easy_preset.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file easy_preset.h @@ -5,11 +7,11 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// +#ifndef LZMA_EASY_PRESET_H +#define LZMA_EASY_PRESET_H + #include "common.h" @@ -30,3 +32,5 @@ typedef struct { /// Set *easy to the settings given by the preset. Returns true on error, /// false on success. extern bool lzma_easy_preset(lzma_options_easy *easy, uint32_t preset); + +#endif diff --git a/contrib/libs/lzma/liblzma/common/file_info.c b/contrib/libs/lzma/liblzma/common/file_info.c index 799bb024fe..7c85084a70 100644 --- a/contrib/libs/lzma/liblzma/common/file_info.c +++ b/contrib/libs/lzma/liblzma/common/file_info.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file file_info.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "index_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/filter_buffer_decoder.c b/contrib/libs/lzma/liblzma/common/filter_buffer_decoder.c index 6620986eea..cc0d88cc71 100644 --- a/contrib/libs/lzma/liblzma/common/filter_buffer_decoder.c +++ b/contrib/libs/lzma/liblzma/common/filter_buffer_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_buffer_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_decoder.h" @@ -24,7 +23,7 @@ lzma_raw_buffer_decode( || out_pos == NULL || *out_pos > out_size) return LZMA_PROG_ERROR; - // Initialize the decoer. + // Initialize the decoder. lzma_next_coder next = LZMA_NEXT_CODER_INIT; return_if_error(lzma_raw_decoder_init(&next, allocator, filters)); diff --git a/contrib/libs/lzma/liblzma/common/filter_buffer_encoder.c b/contrib/libs/lzma/liblzma/common/filter_buffer_encoder.c index dda18e3d8e..7fb8922ae9 100644 --- a/contrib/libs/lzma/liblzma/common/filter_buffer_encoder.c +++ b/contrib/libs/lzma/liblzma/common/filter_buffer_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_buffer_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_encoder.h" diff --git a/contrib/libs/lzma/liblzma/common/filter_common.c b/contrib/libs/lzma/liblzma/common/filter_common.c index fa0927cf9b..d15d9cc94f 100644 --- a/contrib/libs/lzma/liblzma/common/filter_common.c +++ b/contrib/libs/lzma/liblzma/common/filter_common.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_common.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_common.h" @@ -122,6 +121,15 @@ static const struct { .changes_size = false, }, #endif +#if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV) + { + .id = LZMA_FILTER_RISCV, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) { .id = LZMA_FILTER_DELTA, @@ -145,7 +153,7 @@ lzma_filters_copy(const lzma_filter *src, lzma_filter *real_dest, return LZMA_PROG_ERROR; // Use a temporary destination so that the real destination - // will never be modied if an error occurs. + // will never be modified if an error occurs. lzma_filter dest[LZMA_FILTERS_MAX + 1]; lzma_ret ret; diff --git a/contrib/libs/lzma/liblzma/common/filter_common.h b/contrib/libs/lzma/liblzma/common/filter_common.h index 2e47bb69f7..95f9fe2701 100644 --- a/contrib/libs/lzma/liblzma/common/filter_common.h +++ b/contrib/libs/lzma/liblzma/common/filter_common.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_common.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_FILTER_COMMON_H diff --git a/contrib/libs/lzma/liblzma/common/filter_decoder.c b/contrib/libs/lzma/liblzma/common/filter_decoder.c index fa53f5bdba..cbdeb5858f 100644 --- a/contrib/libs/lzma/liblzma/common/filter_decoder.c +++ b/contrib/libs/lzma/liblzma/common/filter_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_decoder.h" @@ -121,6 +120,14 @@ static const lzma_filter_decoder decoders[] = { .props_decode = &lzma_simple_props_decode, }, #endif +#ifdef HAVE_DECODER_RISCV + { + .id = LZMA_FILTER_RISCV, + .init = &lzma_simple_riscv_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif #ifdef HAVE_DECODER_DELTA { .id = LZMA_FILTER_DELTA, @@ -143,6 +150,16 @@ decoder_find(lzma_vli id) } +// lzma_filter_coder begins with the same members as lzma_filter_decoder. +// This function is a wrapper with a type that is compatible with the +// typedef of lzma_filter_find in filter_common.h. +static const lzma_filter_coder * +coder_find(lzma_vli id) +{ + return (const lzma_filter_coder *)decoder_find(id); +} + + extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) { @@ -155,7 +172,7 @@ lzma_raw_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *options) { return lzma_raw_coder_init(next, allocator, - options, (lzma_filter_find)(&decoder_find), false); + options, &coder_find, false); } @@ -174,8 +191,7 @@ lzma_raw_decoder(lzma_stream *strm, const lzma_filter *options) extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) { - return lzma_raw_coder_memusage( - (lzma_filter_find)(&decoder_find), filters); + return lzma_raw_coder_memusage(&coder_find, filters); } diff --git a/contrib/libs/lzma/liblzma/common/filter_decoder.h b/contrib/libs/lzma/liblzma/common/filter_decoder.h index 2dac602828..e610bc1f44 100644 --- a/contrib/libs/lzma/liblzma/common/filter_decoder.h +++ b/contrib/libs/lzma/liblzma/common/filter_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_decoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_FILTER_DECODER_H diff --git a/contrib/libs/lzma/liblzma/common/filter_encoder.c b/contrib/libs/lzma/liblzma/common/filter_encoder.c index c7777dfef8..bc39444898 100644 --- a/contrib/libs/lzma/liblzma/common/filter_encoder.c +++ b/contrib/libs/lzma/liblzma/common/filter_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_encoder.h" @@ -33,7 +32,8 @@ typedef struct { /// Calculates the recommended Uncompressed Size for .xz Blocks to /// which the input data can be split to make multithreaded /// encoding possible. If this is NULL, it is assumed that - /// the encoder is fast enough with single thread. + /// the encoder is fast enough with single thread. If the options + /// are invalid, UINT64_MAX is returned. uint64_t (*block_size)(const void *options); /// Tells the size of the Filter Properties field. If options are @@ -158,6 +158,16 @@ static const lzma_filter_encoder encoders[] = { .props_encode = &lzma_simple_props_encode, }, #endif +#ifdef HAVE_ENCODER_RISCV + { + .id = LZMA_FILTER_RISCV, + .init = &lzma_simple_riscv_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif #ifdef HAVE_ENCODER_DELTA { .id = LZMA_FILTER_DELTA, @@ -183,6 +193,16 @@ encoder_find(lzma_vli id) } +// lzma_filter_coder begins with the same members as lzma_filter_encoder. +// This function is a wrapper with a type that is compatible with the +// typedef of lzma_filter_find in filter_common.h. +static const lzma_filter_coder * +coder_find(lzma_vli id) +{ + return (const lzma_filter_coder *)encoder_find(id); +} + + extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) { @@ -222,7 +242,7 @@ lzma_raw_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *filters) { return lzma_raw_coder_init(next, allocator, - filters, (lzma_filter_find)(&encoder_find), true); + filters, &coder_find, true); } @@ -230,7 +250,7 @@ extern LZMA_API(lzma_ret) lzma_raw_encoder(lzma_stream *strm, const lzma_filter *filters) { lzma_next_strm_init(lzma_raw_coder_init, strm, filters, - (lzma_filter_find)(&encoder_find), true); + &coder_find, true); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; @@ -243,31 +263,33 @@ lzma_raw_encoder(lzma_stream *strm, const lzma_filter *filters) extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) { - return lzma_raw_coder_memusage( - (lzma_filter_find)(&encoder_find), filters); + return lzma_raw_coder_memusage(&coder_find, filters); } -extern uint64_t +extern LZMA_API(uint64_t) lzma_mt_block_size(const lzma_filter *filters) { + if (filters == NULL) + return UINT64_MAX; + uint64_t max = 0; for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { const lzma_filter_encoder *const fe = encoder_find(filters[i].id); + if (fe == NULL) + return UINT64_MAX; + if (fe->block_size != NULL) { const uint64_t size = fe->block_size(filters[i].options); - if (size == 0) - return 0; - if (size > max) max = size; } } - return max; + return max == 0 ? UINT64_MAX : max; } diff --git a/contrib/libs/lzma/liblzma/common/filter_encoder.h b/contrib/libs/lzma/liblzma/common/filter_encoder.h index f1d5683fe7..88f2dafa43 100644 --- a/contrib/libs/lzma/liblzma/common/filter_encoder.h +++ b/contrib/libs/lzma/liblzma/common/filter_encoder.h @@ -1,13 +1,12 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // -/// \file filter_encoder.c +/// \file filter_encoder.h /// \brief Filter ID mapping to filter-specific functions // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_FILTER_ENCODER_H @@ -16,10 +15,6 @@ #include "common.h" -// FIXME: Might become a part of the public API. -extern uint64_t lzma_mt_block_size(const lzma_filter *filters); - - extern lzma_ret lzma_raw_encoder_init( lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *filters); diff --git a/contrib/libs/lzma/liblzma/common/filter_flags_decoder.c b/contrib/libs/lzma/liblzma/common/filter_flags_decoder.c index ddfb085943..0f5d204d47 100644 --- a/contrib/libs/lzma/liblzma/common/filter_flags_decoder.c +++ b/contrib/libs/lzma/liblzma/common/filter_flags_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_flags_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/filter_flags_encoder.c b/contrib/libs/lzma/liblzma/common/filter_flags_encoder.c index b57b9fd80b..e1d65884fb 100644 --- a/contrib/libs/lzma/liblzma/common/filter_flags_encoder.c +++ b/contrib/libs/lzma/liblzma/common/filter_flags_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file filter_flags_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_encoder.h" diff --git a/contrib/libs/lzma/liblzma/common/hardware_cputhreads.c b/contrib/libs/lzma/liblzma/common/hardware_cputhreads.c index 5d246d2cc0..4ce852b42c 100644 --- a/contrib/libs/lzma/liblzma/common/hardware_cputhreads.c +++ b/contrib/libs/lzma/liblzma/common/hardware_cputhreads.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file hardware_cputhreads.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/hardware_physmem.c b/contrib/libs/lzma/liblzma/common/hardware_physmem.c index a2bbbe29d4..1bc34864e8 100644 --- a/contrib/libs/lzma/liblzma/common/hardware_physmem.c +++ b/contrib/libs/lzma/liblzma/common/hardware_physmem.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file hardware_physmem.c @@ -5,9 +7,6 @@ // // Author: Jonathan Nieder // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/index.c b/contrib/libs/lzma/liblzma/common/index.c index 8a35f4398d..6add6a6835 100644 --- a/contrib/libs/lzma/liblzma/common/index.c +++ b/contrib/libs/lzma/liblzma/common/index.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file index.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/index.h b/contrib/libs/lzma/liblzma/common/index.h index 7b27d7004c..007e1188f2 100644 --- a/contrib/libs/lzma/liblzma/common/index.h +++ b/contrib/libs/lzma/liblzma/common/index.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file index.h @@ -12,9 +14,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_INDEX_H diff --git a/contrib/libs/lzma/liblzma/common/index_decoder.c b/contrib/libs/lzma/liblzma/common/index_decoder.c index 19a31b3e94..4bcb306921 100644 --- a/contrib/libs/lzma/liblzma/common/index_decoder.c +++ b/contrib/libs/lzma/liblzma/common/index_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file index_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "index_decoder.h" @@ -306,6 +305,12 @@ lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, extern LZMA_API(lzma_ret) lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) { + // If i isn't NULL, *i must always be initialized due to + // the wording in the API docs. This way it is initialized + // if we return LZMA_PROG_ERROR due to strm == NULL. + if (i != NULL) + *i = NULL; + lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit); strm->internal->supported_actions[LZMA_RUN] = true; @@ -320,6 +325,11 @@ lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size) { + // If i isn't NULL, *i must always be initialized due to + // the wording in the API docs. + if (i != NULL) + *i = NULL; + // Sanity checks if (i == NULL || memlimit == NULL || in == NULL || in_pos == NULL || *in_pos > in_size) diff --git a/contrib/libs/lzma/liblzma/common/index_decoder.h b/contrib/libs/lzma/liblzma/common/index_decoder.h index 3fec483331..5351d2f0df 100644 --- a/contrib/libs/lzma/liblzma/common/index_decoder.h +++ b/contrib/libs/lzma/liblzma/common/index_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file index_decoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_INDEX_DECODER_H diff --git a/contrib/libs/lzma/liblzma/common/index_encoder.c b/contrib/libs/lzma/liblzma/common/index_encoder.c index 204490cc19..ecc299c015 100644 --- a/contrib/libs/lzma/liblzma/common/index_encoder.c +++ b/contrib/libs/lzma/liblzma/common/index_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file index_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "index_encoder.h" diff --git a/contrib/libs/lzma/liblzma/common/index_encoder.h b/contrib/libs/lzma/liblzma/common/index_encoder.h index 4d55cd1047..29ba110669 100644 --- a/contrib/libs/lzma/liblzma/common/index_encoder.h +++ b/contrib/libs/lzma/liblzma/common/index_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file index_encoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_INDEX_ENCODER_H diff --git a/contrib/libs/lzma/liblzma/common/index_hash.c b/contrib/libs/lzma/liblzma/common/index_hash.c index 52c3d65077..caa5967ca4 100644 --- a/contrib/libs/lzma/liblzma/common/index_hash.c +++ b/contrib/libs/lzma/liblzma/common/index_hash.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file index_hash.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/lzip_decoder.c b/contrib/libs/lzma/liblzma/common/lzip_decoder.c index 88cc7ffd23..651a0ae712 100644 --- a/contrib/libs/lzma/liblzma/common/lzip_decoder.c +++ b/contrib/libs/lzma/liblzma/common/lzip_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzip_decoder.c @@ -6,9 +8,6 @@ // Author: Michał Górny // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lzip_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/lzip_decoder.h b/contrib/libs/lzma/liblzma/common/lzip_decoder.h index 33a01c352c..0e1f7bebd4 100644 --- a/contrib/libs/lzma/liblzma/common/lzip_decoder.h +++ b/contrib/libs/lzma/liblzma/common/lzip_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzip_decoder.h @@ -5,9 +7,6 @@ // // Author: Michał Górny // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZIP_DECODER_H diff --git a/contrib/libs/lzma/liblzma/common/memcmplen.h b/contrib/libs/lzma/liblzma/common/memcmplen.h index 99d9c519cc..394a4856dd 100644 --- a/contrib/libs/lzma/liblzma/common/memcmplen.h +++ b/contrib/libs/lzma/liblzma/common/memcmplen.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file memcmplen.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_MEMCMPLEN_H @@ -24,7 +23,8 @@ // can use the intrinsics without the header file. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ && defined(_MSC_VER) \ - && defined(_M_X64) \ + && (defined(_M_X64) \ + || defined(_M_ARM64) || defined(_M_ARM64EC)) \ && !defined(__INTEL_COMPILER) # include <intrin.h> #endif @@ -57,20 +57,35 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, assert(limit <= UINT32_MAX / 2); #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ - && ((TUKLIB_GNUC_REQ(3, 4) && defined(__x86_64__)) \ + && (((TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) \ + && (defined(__x86_64__) \ + || defined(__aarch64__))) \ || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \ || (defined(__INTEL_COMPILER) && defined(_M_X64)) \ - || (defined(_MSC_VER) && defined(_M_X64))) - // I keep this x86-64 only for now since that's where I know this - // to be a good method. This may be fine on other 64-bit CPUs too. - // On big endian one should use xor instead of subtraction and switch - // to __builtin_clzll(). + || (defined(_MSC_VER) && (defined(_M_X64) \ + || defined(_M_ARM64) || defined(_M_ARM64EC)))) + // This is only for x86-64 and ARM64 for now. This might be fine on + // other 64-bit processors too. On big endian one should use xor + // instead of subtraction and switch to __builtin_clzll(). + // + // Reasons to use subtraction instead of xor: + // + // - On some x86-64 processors (Intel Sandy Bridge to Tiger Lake), + // sub+jz and sub+jnz can be fused but xor+jz or xor+jnz cannot. + // Thus using subtraction has potential to be a tiny amount faster + // since the code checks if the quotient is non-zero. + // + // - Some processors (Intel Pentium 4) used to have more ALU + // resources for add/sub instructions than and/or/xor. + // + // The processor info is based on Agner Fog's microarchitecture.pdf + // version 2023-05-26. https://www.agner.org/optimize/ #define LZMA_MEMCMPLEN_EXTRA 8 while (len < limit) { const uint64_t x = read64ne(buf1 + len) - read64ne(buf2 + len); if (x != 0) { // MSVC or Intel C compiler on Windows -# if (defined(_MSC_VER) || defined(__INTEL_COMPILER)) && defined(_M_X64) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) unsigned long tmp; _BitScanForward64(&tmp, x); len += (uint32_t)tmp >> 3; diff --git a/contrib/libs/lzma/liblzma/common/microlzma_decoder.c b/contrib/libs/lzma/liblzma/common/microlzma_decoder.c index e473373daa..882cb2c808 100644 --- a/contrib/libs/lzma/liblzma/common/microlzma_decoder.c +++ b/contrib/libs/lzma/liblzma/common/microlzma_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file microlzma_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lzma_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/microlzma_encoder.c b/contrib/libs/lzma/liblzma/common/microlzma_encoder.c index a787ca25b8..45ec0b12f4 100644 --- a/contrib/libs/lzma/liblzma/common/microlzma_encoder.c +++ b/contrib/libs/lzma/liblzma/common/microlzma_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file microlzma_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lzma_encoder.h" diff --git a/contrib/libs/lzma/liblzma/common/outqueue.c b/contrib/libs/lzma/liblzma/common/outqueue.c index 71e8648a29..eb018eb42b 100644 --- a/contrib/libs/lzma/liblzma/common/outqueue.c +++ b/contrib/libs/lzma/liblzma/common/outqueue.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file outqueue.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "outqueue.h" diff --git a/contrib/libs/lzma/liblzma/common/outqueue.h b/contrib/libs/lzma/liblzma/common/outqueue.h index 596911e95e..25f071977a 100644 --- a/contrib/libs/lzma/liblzma/common/outqueue.h +++ b/contrib/libs/lzma/liblzma/common/outqueue.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file outqueue.h @@ -5,11 +7,11 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// +#ifndef LZMA_OUTQUEUE_H +#define LZMA_OUTQUEUE_H + #include "common.h" @@ -252,3 +254,5 @@ lzma_outq_outbuf_memusage(size_t buf_size) assert(buf_size <= SIZE_MAX - sizeof(lzma_outbuf)); return sizeof(lzma_outbuf) + buf_size; } + +#endif diff --git a/contrib/libs/lzma/liblzma/common/stream_buffer_decoder.c b/contrib/libs/lzma/liblzma/common/stream_buffer_decoder.c index b9745b5dbe..c4f91fb498 100644 --- a/contrib/libs/lzma/liblzma/common/stream_buffer_decoder.c +++ b/contrib/libs/lzma/liblzma/common/stream_buffer_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_buffer_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "stream_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/stream_buffer_encoder.c b/contrib/libs/lzma/liblzma/common/stream_buffer_encoder.c index 73157590e6..04d5869594 100644 --- a/contrib/libs/lzma/liblzma/common/stream_buffer_encoder.c +++ b/contrib/libs/lzma/liblzma/common/stream_buffer_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_buffer_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/stream_decoder.c b/contrib/libs/lzma/liblzma/common/stream_decoder.c index 64283812f2..7f42684136 100644 --- a/contrib/libs/lzma/liblzma/common/stream_decoder.c +++ b/contrib/libs/lzma/liblzma/common/stream_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "stream_decoder.h" diff --git a/contrib/libs/lzma/liblzma/common/stream_decoder.h b/contrib/libs/lzma/liblzma/common/stream_decoder.h index c13c6ba127..5803715374 100644 --- a/contrib/libs/lzma/liblzma/common/stream_decoder.h +++ b/contrib/libs/lzma/liblzma/common/stream_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_decoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_STREAM_DECODER_H diff --git a/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c b/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c index 76212b46da..244624a479 100644 --- a/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c +++ b/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_decoder_mt.c @@ -6,9 +8,6 @@ // Authors: Sebastian Andrzej Siewior // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/stream_encoder.c b/contrib/libs/lzma/liblzma/common/stream_encoder.c index ee92046018..e7e5b3fce7 100644 --- a/contrib/libs/lzma/liblzma/common/stream_encoder.c +++ b/contrib/libs/lzma/liblzma/common/stream_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "block_encoder.h" diff --git a/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c b/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c index f64de9bdbc..f0fef15233 100644 --- a/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c +++ b/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_encoder_mt.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_encoder.h" @@ -979,20 +978,18 @@ get_options(const lzma_mt *options, lzma_options_easy *opt_easy, *filters = opt_easy->filters; } - // Block size - if (options->block_size > 0) { - if (options->block_size > BLOCK_SIZE_MAX) - return LZMA_OPTIONS_ERROR; - + // If the Block size is not set, determine it from the filter chain. + if (options->block_size > 0) *block_size = options->block_size; - } else { - // Determine the Block size from the filter chain. + else *block_size = lzma_mt_block_size(*filters); - if (*block_size == 0) - return LZMA_OPTIONS_ERROR; - assert(*block_size <= BLOCK_SIZE_MAX); - } + // UINT64_MAX > BLOCK_SIZE_MAX, so the second condition + // should be optimized out by any reasonable compiler. + // The second condition should be there in the unlikely event that + // the macros change and UINT64_MAX < BLOCK_SIZE_MAX. + if (*block_size > BLOCK_SIZE_MAX || *block_size == UINT64_MAX) + return LZMA_OPTIONS_ERROR; // Calculate the maximum amount output that a single output buffer // may need to hold. This is the same as the maximum total size of diff --git a/contrib/libs/lzma/liblzma/common/stream_flags_common.c b/contrib/libs/lzma/liblzma/common/stream_flags_common.c index fbe8eb8abd..41b8dcb70d 100644 --- a/contrib/libs/lzma/liblzma/common/stream_flags_common.c +++ b/contrib/libs/lzma/liblzma/common/stream_flags_common.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_common.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "stream_flags_common.h" diff --git a/contrib/libs/lzma/liblzma/common/stream_flags_common.h b/contrib/libs/lzma/liblzma/common/stream_flags_common.h index 84e96ba1ff..28729dbcb6 100644 --- a/contrib/libs/lzma/liblzma/common/stream_flags_common.h +++ b/contrib/libs/lzma/liblzma/common/stream_flags_common.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_common.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_STREAM_FLAGS_COMMON_H diff --git a/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c b/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c index b8d263ba44..522c98b6fd 100644 --- a/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c +++ b/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "stream_flags_common.h" diff --git a/contrib/libs/lzma/liblzma/common/stream_flags_encoder.c b/contrib/libs/lzma/liblzma/common/stream_flags_encoder.c index b98ab17c45..f94b5cd0a2 100644 --- a/contrib/libs/lzma/liblzma/common/stream_flags_encoder.c +++ b/contrib/libs/lzma/liblzma/common/stream_flags_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "stream_flags_common.h" diff --git a/contrib/libs/lzma/liblzma/common/string_conversion.c b/contrib/libs/lzma/liblzma/common/string_conversion.c index d2c1e80936..c899783c64 100644 --- a/contrib/libs/lzma/liblzma/common/string_conversion.c +++ b/contrib/libs/lzma/liblzma/common/string_conversion.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file string_conversion.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "filter_common.h" @@ -218,12 +217,14 @@ typedef struct { uint16_t offset; union { + // NVHPC has problems with unions that contain pointers that + // are not the first members, so keep "map" at the top. + const name_value_map *map; + struct { uint32_t min; uint32_t max; } range; - - const name_value_map *map; } u; } option_map; @@ -250,7 +251,9 @@ static const char *parse_options(const char **const str, const char *str_end, || defined(HAVE_ENCODER_IA64) \ || defined(HAVE_DECODER_IA64) \ || defined(HAVE_ENCODER_SPARC) \ - || defined(HAVE_DECODER_SPARC) + || defined(HAVE_DECODER_SPARC) \ + || defined(HAVE_ENCODER_RISCV) \ + || defined(HAVE_DECODER_RISCV) static const option_map bcj_optmap[] = { { .name = "start", @@ -509,6 +512,11 @@ static const struct { &parse_bcj, bcj_optmap, 1, 1, true }, #endif +#if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV) + { "riscv", sizeof(lzma_options_bcj), LZMA_FILTER_RISCV, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC, &parse_bcj, bcj_optmap, 1, 1, true }, @@ -994,6 +1002,12 @@ extern LZMA_API(const char *) lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator) { + // If error_pos isn't NULL, *error_pos must always be set. + // liblzma <= 5.4.6 and <= 5.6.1 have a bug and don't do this + // when str == NULL or filters == NULL or flags are unsupported. + if (error_pos != NULL) + *error_pos = 0; + if (str == NULL || filters == NULL) return "Unexpected NULL pointer argument(s) " "to lzma_str_to_filters()"; diff --git a/contrib/libs/lzma/liblzma/common/vli_decoder.c b/contrib/libs/lzma/liblzma/common/vli_decoder.c index af2799d1fb..3254ccc35b 100644 --- a/contrib/libs/lzma/liblzma/common/vli_decoder.c +++ b/contrib/libs/lzma/liblzma/common/vli_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file vli_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/vli_encoder.c b/contrib/libs/lzma/liblzma/common/vli_encoder.c index f8642694e2..3859006a94 100644 --- a/contrib/libs/lzma/liblzma/common/vli_encoder.c +++ b/contrib/libs/lzma/liblzma/common/vli_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file vli_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/common/vli_size.c b/contrib/libs/lzma/liblzma/common/vli_size.c index ec1b4fa488..c8cb2ec10a 100644 --- a/contrib/libs/lzma/liblzma/common/vli_size.c +++ b/contrib/libs/lzma/liblzma/common/vli_size.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file vli_size.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/delta/delta_common.c b/contrib/libs/lzma/liblzma/delta/delta_common.c index 4768201d1a..5dbe253b4b 100644 --- a/contrib/libs/lzma/liblzma/delta/delta_common.c +++ b/contrib/libs/lzma/liblzma/delta/delta_common.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file delta_common.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "delta_common.h" diff --git a/contrib/libs/lzma/liblzma/delta/delta_common.h b/contrib/libs/lzma/liblzma/delta/delta_common.h index 7e7e1baaf6..bd09127697 100644 --- a/contrib/libs/lzma/liblzma/delta/delta_common.h +++ b/contrib/libs/lzma/liblzma/delta/delta_common.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file delta_common.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_DELTA_COMMON_H diff --git a/contrib/libs/lzma/liblzma/delta/delta_decoder.c b/contrib/libs/lzma/liblzma/delta/delta_decoder.c index 77cf65cc76..9f0d49ca41 100644 --- a/contrib/libs/lzma/liblzma/delta/delta_decoder.c +++ b/contrib/libs/lzma/liblzma/delta/delta_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file delta_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "delta_decoder.h" @@ -26,6 +25,11 @@ decode_buffer(lzma_delta_coder *coder, uint8_t *buffer, size_t size) } +// For an unknown reason NVIDIA HPC Compiler needs this pragma +// to produce working code. +#ifdef __NVCOMPILER +# pragma routine novector +#endif static lzma_ret delta_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, diff --git a/contrib/libs/lzma/liblzma/delta/delta_decoder.h b/contrib/libs/lzma/liblzma/delta/delta_decoder.h index ad89cc6597..e2268ed44e 100644 --- a/contrib/libs/lzma/liblzma/delta/delta_decoder.h +++ b/contrib/libs/lzma/liblzma/delta/delta_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file delta_decoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_DELTA_DECODER_H diff --git a/contrib/libs/lzma/liblzma/delta/delta_encoder.c b/contrib/libs/lzma/liblzma/delta/delta_encoder.c index 056bf7468e..ba4a50b1f4 100644 --- a/contrib/libs/lzma/liblzma/delta/delta_encoder.c +++ b/contrib/libs/lzma/liblzma/delta/delta_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file delta_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "delta_encoder.h" diff --git a/contrib/libs/lzma/liblzma/delta/delta_encoder.h b/contrib/libs/lzma/liblzma/delta/delta_encoder.h index 4ab9847851..735f0ed009 100644 --- a/contrib/libs/lzma/liblzma/delta/delta_encoder.h +++ b/contrib/libs/lzma/liblzma/delta/delta_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file delta_encoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_DELTA_ENCODER_H diff --git a/contrib/libs/lzma/liblzma/delta/delta_private.h b/contrib/libs/lzma/liblzma/delta/delta_private.h index 0d6cb38661..e54721a846 100644 --- a/contrib/libs/lzma/liblzma/delta/delta_private.h +++ b/contrib/libs/lzma/liblzma/delta/delta_private.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file delta_private.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_DELTA_PRIVATE_H diff --git a/contrib/libs/lzma/liblzma/lz/lz_decoder.c b/contrib/libs/lzma/liblzma/lz/lz_decoder.c index 06c95c1137..92913f225a 100644 --- a/contrib/libs/lzma/liblzma/lz/lz_decoder.c +++ b/contrib/libs/lzma/liblzma/lz/lz_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_decoder.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// // liblzma supports multiple LZ77-based filters. The LZ part is shared @@ -54,9 +53,10 @@ typedef struct { static void lz_decoder_reset(lzma_coder *coder) { - coder->dict.pos = 0; + coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX; coder->dict.full = 0; - coder->dict.buf[coder->dict.size - 1] = '\0'; + coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0'; + coder->dict.has_wrapped = false; coder->dict.need_reset = false; return; } @@ -70,8 +70,15 @@ decode_buffer(lzma_coder *coder, { while (true) { // Wrap the dictionary if needed. - if (coder->dict.pos == coder->dict.size) - coder->dict.pos = 0; + if (coder->dict.pos == coder->dict.size) { + // See the comment of #define LZ_DICT_REPEAT_MAX. + coder->dict.pos = LZ_DICT_REPEAT_MAX; + coder->dict.has_wrapped = true; + memcpy(coder->dict.buf, coder->dict.buf + + coder->dict.size + - LZ_DICT_REPEAT_MAX, + LZ_DICT_REPEAT_MAX); + } // Store the current dictionary position. It is needed to know // where to start copying to the out[] buffer. @@ -253,21 +260,31 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, // dictionary to the output buffer, since applications are // recommended to give aligned buffers to liblzma. // + // Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is + // needed for alloc_size. + // // Avoid integer overflow. - if (lz_options.dict_size > SIZE_MAX - 15) + if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX) return LZMA_MEM_ERROR; lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); + // Reserve extra space as explained in the comment + // of #define LZ_DICT_REPEAT_MAX. + const size_t alloc_size + = lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX; + // Allocate and initialize the dictionary. - if (coder->dict.size != lz_options.dict_size) { + if (coder->dict.size != alloc_size) { lzma_free(coder->dict.buf, allocator); - coder->dict.buf - = lzma_alloc(lz_options.dict_size, allocator); + coder->dict.buf = lzma_alloc(alloc_size, allocator); if (coder->dict.buf == NULL) return LZMA_MEM_ERROR; - coder->dict.size = lz_options.dict_size; + // NOTE: Yes, alloc_size, not lz_options.dict_size. The way + // coder->dict.full is updated will take care that we will + // still reject distances larger than lz_options.dict_size. + coder->dict.size = alloc_size; } lz_decoder_reset(next->coder); @@ -280,9 +297,12 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const size_t copy_size = my_min(lz_options.preset_dict_size, lz_options.dict_size); const size_t offset = lz_options.preset_dict_size - copy_size; - memcpy(coder->dict.buf, lz_options.preset_dict + offset, + memcpy(coder->dict.buf + coder->dict.pos, + lz_options.preset_dict + offset, copy_size); - coder->dict.pos = copy_size; + + // dict.pos isn't zero after lz_decoder_reset(). + coder->dict.pos += copy_size; coder->dict.full = copy_size; } diff --git a/contrib/libs/lzma/liblzma/lz/lz_decoder.h b/contrib/libs/lzma/liblzma/lz/lz_decoder.h index ad80d4dd0d..cb61b6e24c 100644 --- a/contrib/libs/lzma/liblzma/lz/lz_decoder.h +++ b/contrib/libs/lzma/liblzma/lz/lz_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_decoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZ_DECODER_H @@ -17,10 +16,28 @@ #include "common.h" +/// Maximum length of a match rounded up to a nice power of 2 which is +/// a good size for aligned memcpy(). The allocated dictionary buffer will +/// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size: +/// +/// (1) Every time the decoder reaches the end of the dictionary buffer, +/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning. +/// This way dict_repeat() will only need to copy from one place, +/// never from both the end and beginning of the buffer. +/// +/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between +/// the oldest byte still in the dictionary and the current write +/// position. This way dict_repeat(dict, dict->size - 1, &len) +/// won't need memmove() as the copying cannot overlap. +/// +/// Note that memcpy() still cannot be used if distance < len. +/// +/// LZMA's longest match length is 273 so pick a multiple of 16 above that. +#define LZ_DICT_REPEAT_MAX 288 + + typedef struct { - /// Pointer to the dictionary buffer. It can be an allocated buffer - /// internal to liblzma, or it can a be a buffer given by the - /// application when in single-call mode (not implemented yet). + /// Pointer to the dictionary buffer. uint8_t *buf; /// Write position in dictionary. The next byte will be written to @@ -35,9 +52,16 @@ typedef struct { /// Write limit size_t limit; - /// Size of the dictionary + /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes + /// larger than the actual dictionary size. This is enforced by + /// how the value for "full" is set; it can be at most + /// "size - 2 * LZ_DICT_REPEAT_MAX". size_t size; + /// True once the dictionary has become full and the writing position + /// has been wrapped in decode_buffer() in lz_decoder.c. + bool has_wrapped; + /// True when dictionary should be reset before decoding more data. bool need_reset; @@ -103,7 +127,16 @@ static inline uint8_t dict_get(const lzma_dict *const dict, const uint32_t distance) { return dict->buf[dict->pos - distance - 1 - + (distance < dict->pos ? 0 : dict->size)]; + + (distance < dict->pos + ? 0 : dict->size - LZ_DICT_REPEAT_MAX)]; +} + + +/// Optimized version of dict_get(dict, 0) +static inline uint8_t +dict_get0(const lzma_dict *const dict) +{ + return dict->buf[dict->pos - 1]; } @@ -132,68 +165,51 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) uint32_t left = my_min(dict_avail, *len); *len -= left; + size_t back = dict->pos - distance - 1; + if (distance >= dict->pos) + back += dict->size - LZ_DICT_REPEAT_MAX; + // Repeat a block of data from the history. Because memcpy() is faster // than copying byte by byte in a loop, the copying process gets split - // into three cases. + // into two cases. if (distance < left) { // Source and target areas overlap, thus we can't use // memcpy() nor even memmove() safely. do { - dict->buf[dict->pos] = dict_get(dict, distance); - ++dict->pos; + dict->buf[dict->pos++] = dict->buf[back++]; } while (--left > 0); - - } else if (distance < dict->pos) { - // The easiest and fastest case - memcpy(dict->buf + dict->pos, - dict->buf + dict->pos - distance - 1, - left); - dict->pos += left; - } else { - // The bigger the dictionary, the more rare this - // case occurs. We need to "wrap" the dict, thus - // we might need two memcpy() to copy all the data. - assert(dict->full == dict->size); - const uint32_t copy_pos - = dict->pos - distance - 1 + dict->size; - uint32_t copy_size = dict->size - copy_pos; - - if (copy_size < left) { - memmove(dict->buf + dict->pos, dict->buf + copy_pos, - copy_size); - dict->pos += copy_size; - copy_size = left - copy_size; - memcpy(dict->buf + dict->pos, dict->buf, copy_size); - dict->pos += copy_size; - } else { - memmove(dict->buf + dict->pos, dict->buf + copy_pos, - left); - dict->pos += left; - } + memcpy(dict->buf + dict->pos, dict->buf + back, left); + dict->pos += left; } // Update how full the dictionary is. - if (dict->full < dict->pos) - dict->full = dict->pos; + if (!dict->has_wrapped) + dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; - return unlikely(*len != 0); + return *len != 0; +} + + +static inline void +dict_put(lzma_dict *dict, uint8_t byte) +{ + dict->buf[dict->pos++] = byte; + + if (!dict->has_wrapped) + dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; } /// Puts one byte into the dictionary. Returns true if the dictionary was /// already full and the byte couldn't be added. static inline bool -dict_put(lzma_dict *dict, uint8_t byte) +dict_put_safe(lzma_dict *dict, uint8_t byte) { if (unlikely(dict->pos == dict->limit)) return true; - dict->buf[dict->pos++] = byte; - - if (dict->pos > dict->full) - dict->full = dict->pos; - + dict_put(dict, byte); return false; } @@ -217,8 +233,8 @@ dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, *left -= lzma_bufcpy(in, in_pos, in_size, dict->buf, &dict->pos, dict->limit); - if (dict->pos > dict->full) - dict->full = dict->pos; + if (!dict->has_wrapped) + dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; return; } diff --git a/contrib/libs/lzma/liblzma/lz/lz_encoder.c b/contrib/libs/lzma/liblzma/lz/lz_encoder.c index 3f86c13f7b..9e34ab4cd7 100644 --- a/contrib/libs/lzma/liblzma/lz/lz_encoder.c +++ b/contrib/libs/lzma/liblzma/lz/lz_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lz_encoder.h" @@ -196,9 +195,7 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, // For now, the dictionary size is limited to 1.5 GiB. This may grow // in the future if needed, but it needs a little more work than just // changing this check. - if (lz_options->dict_size < LZMA_DICT_SIZE_MIN - || lz_options->dict_size - > (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size) || lz_options->nice_len > lz_options->match_len_max) return true; diff --git a/contrib/libs/lzma/liblzma/lz/lz_encoder.h b/contrib/libs/lzma/liblzma/lz/lz_encoder.h index 8d9c6d6e94..eb197c6b6c 100644 --- a/contrib/libs/lzma/liblzma/lz/lz_encoder.h +++ b/contrib/libs/lzma/liblzma/lz/lz_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZ_ENCODER_H @@ -17,6 +16,14 @@ #include "common.h" +// For now, the dictionary size is limited to 1.5 GiB. This may grow +// in the future if needed, but it needs a little more work than just +// changing this check. +#define IS_ENC_DICT_SIZE_VALID(size) \ + ((size) >= LZMA_DICT_SIZE_MIN \ + && (size) <= (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) + + /// A table of these is used by the LZ-based encoder to hold /// the length-distance pairs found by the match finder. typedef struct { @@ -221,7 +228,7 @@ typedef struct { // 3. The literals and matches are encoded using e.g. LZMA. // // The bytes that have been ran through the match finder, but not encoded yet, -// are called `read ahead'. +// are called 'read ahead'. /// Get how many bytes the match finder hashes in its initial step. diff --git a/contrib/libs/lzma/liblzma/lz/lz_encoder_hash.h b/contrib/libs/lzma/liblzma/lz/lz_encoder_hash.h index 4d9971ae6a..8ace82b04c 100644 --- a/contrib/libs/lzma/liblzma/lz/lz_encoder_hash.h +++ b/contrib/libs/lzma/liblzma/lz/lz_encoder_hash.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder_hash.h @@ -5,9 +7,6 @@ // // Author: Igor Pavlov // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZ_ENCODER_HASH_H diff --git a/contrib/libs/lzma/liblzma/lz/lz_encoder_mf.c b/contrib/libs/lzma/liblzma/lz/lz_encoder_mf.c index 1fdc2d7949..557c2612f2 100644 --- a/contrib/libs/lzma/liblzma/lz/lz_encoder_mf.c +++ b/contrib/libs/lzma/liblzma/lz/lz_encoder_mf.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder_mf.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lz_encoder.h" diff --git a/contrib/libs/lzma/liblzma/lzma/fastpos.h b/contrib/libs/lzma/liblzma/lzma/fastpos.h index dbeb16f7e3..d3969a753f 100644 --- a/contrib/libs/lzma/liblzma/lzma/fastpos.h +++ b/contrib/libs/lzma/liblzma/lzma/fastpos.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file fastpos.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_FASTPOS_H diff --git a/contrib/libs/lzma/liblzma/lzma/fastpos_table.c b/contrib/libs/lzma/liblzma/lzma/fastpos_table.c index 6a3ceac0e9..4e10e3795e 100644 --- a/contrib/libs/lzma/liblzma/lzma/fastpos_table.c +++ b/contrib/libs/lzma/liblzma/lzma/fastpos_table.c @@ -1,4 +1,6 @@ -/* This file has been automatically generated by fastpos_tablegen.c. */ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by fastpos_tablegen.c. #include "common.h" #include "fastpos.h" diff --git a/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c b/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c index 567df490ca..37ab253f5b 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c +++ b/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma2_decoder.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lzma2_decoder.h" diff --git a/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.h b/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.h index ef2dcbfa76..cdd8b463ab 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.h +++ b/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma2_decoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZMA2_DECODER_H diff --git a/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c b/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c index 4b6b23118d..e20b75b300 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c +++ b/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma2_encoder.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lz_encoder.h" @@ -409,6 +408,9 @@ lzma_lzma2_block_size(const void *options) { const lzma_options_lzma *const opt = options; + if (!IS_ENC_DICT_SIZE_VALID(opt->dict_size)) + return UINT64_MAX; + // Use at least 1 MiB to keep compression ratio better. return my_max((uint64_t)(opt->dict_size) * 3, UINT64_C(1) << 20); } diff --git a/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.h b/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.h index 515f183934..29966a66d2 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.h +++ b/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma2_encoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZMA2_ENCODER_H diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_common.h b/contrib/libs/lzma/liblzma/lzma/lzma_common.h index 9d040d95bb..c3c587f090 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_common.h +++ b/contrib/libs/lzma/liblzma/lzma/lzma_common.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_common.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZMA_COMMON_H @@ -84,6 +83,20 @@ typedef enum { ? (state) - 3 \ : (state) - 6)) +/// Like update_literal(state) but when it is already known that +/// is_literal_state(state) is true. +#define update_literal_normal(state) \ + state = ((state) <= STATE_SHORTREP_LIT_LIT \ + ? STATE_LIT_LIT \ + : (state) - 3); + +/// Like update_literal(state) but when it is already known that +/// is_literal_state(state) is false. +#define update_literal_matched(state) \ + state = ((state) <= STATE_LIT_SHORTREP \ + ? (state) - 3 \ + : (state) - 6); + /// Indicate that the latest state was a match. #define update_match(state) \ state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH) @@ -112,31 +125,33 @@ typedef enum { /// /// Match byte is used when the previous LZMA symbol was something else than /// a literal (that is, it was some kind of match). -#define LITERAL_CODER_SIZE 0x300 +#define LITERAL_CODER_SIZE UINT32_C(0x300) /// Maximum number of literal coders #define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX) +/// Calculates the literal_mask that literal_subcoder() needs. +#define literal_mask_calc(lc, lp) \ + ((UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))) + /// Locate the literal coder for the next literal byte. The choice depends on /// - the lowest literal_pos_bits bits of the position of the current /// byte; and /// - the highest literal_context_bits bits of the previous byte. -#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \ - ((probs)[(((pos) & (lp_mask)) << (lc)) \ - + ((uint32_t)(prev_byte) >> (8U - (lc)))]) +#define literal_subcoder(probs, lc, literal_mask, pos, prev_byte) \ + ((probs) + UINT32_C(3) * \ + (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc))) static inline void -literal_init(probability (*probs)[LITERAL_CODER_SIZE], - uint32_t lc, uint32_t lp) +literal_init(probability *probs, uint32_t lc, uint32_t lp) { assert(lc + lp <= LZMA_LCLP_MAX); - const uint32_t coders = 1U << (lc + lp); + const size_t coders = LITERAL_CODER_SIZE << (lc + lp); - for (uint32_t i = 0; i < coders; ++i) - for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j) - bit_reset(probs[i][j]); + for (size_t i = 0; i < coders; ++i) + bit_reset(probs[i]); return; } diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_decoder.c b/contrib/libs/lzma/liblzma/lzma/lzma_decoder.c index 26c148a95e..0abed02b81 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_decoder.c +++ b/contrib/libs/lzma/liblzma/lzma/lzma_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_decoder.c @@ -5,9 +7,7 @@ /// // Authors: Igor Pavlov // Lasse Collin -// -// This file has been put into the public domain. -// You can do whatever you want with this file. +// Jia Tan // /////////////////////////////////////////////////////////////////////////////// @@ -22,25 +22,20 @@ # pragma GCC diagnostic ignored "-Wimplicit-fallthrough" #endif +// Minimum number of input bytes to safely decode one LZMA symbol. +// The worst case is that we decode 22 bits using probabilities and 26 +// direct bits. This may decode at maximum 20 bytes of input. +#define LZMA_IN_REQUIRED 20 -#ifdef HAVE_SMALL // Macros for (somewhat) size-optimized code. -#define seq_4(seq) seq - -#define seq_6(seq) seq - -#define seq_8(seq) seq - -#define seq_len(seq) \ - seq ## _CHOICE, \ - seq ## _CHOICE2, \ - seq ## _BITTREE - +// This is used to decode the match length (how many bytes must be repeated +// from the dictionary). This version is used in the Resumable mode and +// does not unroll any loops. #define len_decode(target, ld, pos_state, seq) \ do { \ case seq ## _CHOICE: \ - rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_if_0_safe(ld.choice, seq ## _CHOICE) { \ rc_update_0(ld.choice); \ probs = ld.low[pos_state];\ limit = LEN_LOW_SYMBOLS; \ @@ -48,7 +43,7 @@ case seq ## _CHOICE: \ } else { \ rc_update_1(ld.choice); \ case seq ## _CHOICE2: \ - rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \ rc_update_0(ld.choice2); \ probs = ld.mid[pos_state]; \ limit = LEN_MID_SYMBOLS; \ @@ -64,98 +59,39 @@ case seq ## _CHOICE2: \ symbol = 1; \ case seq ## _BITTREE: \ do { \ - rc_bit(probs[symbol], , , seq ## _BITTREE); \ + rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \ } while (symbol < limit); \ target += symbol - limit; \ } while (0) -#else // HAVE_SMALL - -// Unrolled versions -#define seq_4(seq) \ - seq ## 0, \ - seq ## 1, \ - seq ## 2, \ - seq ## 3 - -#define seq_6(seq) \ - seq ## 0, \ - seq ## 1, \ - seq ## 2, \ - seq ## 3, \ - seq ## 4, \ - seq ## 5 - -#define seq_8(seq) \ - seq ## 0, \ - seq ## 1, \ - seq ## 2, \ - seq ## 3, \ - seq ## 4, \ - seq ## 5, \ - seq ## 6, \ - seq ## 7 - -#define seq_len(seq) \ - seq ## _CHOICE, \ - seq ## _LOW0, \ - seq ## _LOW1, \ - seq ## _LOW2, \ - seq ## _CHOICE2, \ - seq ## _MID0, \ - seq ## _MID1, \ - seq ## _MID2, \ - seq ## _HIGH0, \ - seq ## _HIGH1, \ - seq ## _HIGH2, \ - seq ## _HIGH3, \ - seq ## _HIGH4, \ - seq ## _HIGH5, \ - seq ## _HIGH6, \ - seq ## _HIGH7 -#define len_decode(target, ld, pos_state, seq) \ +// This is the faster version of the match length decoder that does not +// worry about being resumable. It unrolls the bittree decoding loop. +#define len_decode_fast(target, ld, pos_state) \ do { \ symbol = 1; \ -case seq ## _CHOICE: \ - rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_if_0(ld.choice) { \ rc_update_0(ld.choice); \ - rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ - rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ - rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ - target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ + rc_bittree3(ld.low[pos_state], \ + -LEN_LOW_SYMBOLS + MATCH_LEN_MIN); \ + target = symbol; \ } else { \ rc_update_1(ld.choice); \ -case seq ## _CHOICE2: \ - rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_if_0(ld.choice2) { \ rc_update_0(ld.choice2); \ - rc_bit_case(ld.mid[pos_state][symbol], , , \ - seq ## _MID0); \ - rc_bit_case(ld.mid[pos_state][symbol], , , \ - seq ## _MID1); \ - rc_bit_case(ld.mid[pos_state][symbol], , , \ - seq ## _MID2); \ - target = symbol - LEN_MID_SYMBOLS \ - + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + rc_bittree3(ld.mid[pos_state], -LEN_MID_SYMBOLS \ + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS); \ + target = symbol; \ } else { \ rc_update_1(ld.choice2); \ - rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ - rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ - rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ - rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ - rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ - rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ - rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ - rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ - target = symbol - LEN_HIGH_SYMBOLS \ + rc_bittree8(ld.high, -LEN_HIGH_SYMBOLS \ + MATCH_LEN_MIN \ - + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS); \ + target = symbol; \ } \ } \ } while (0) -#endif // HAVE_SMALL - /// Length decoder probabilities; see comments in lzma_common.h. typedef struct { @@ -173,7 +109,7 @@ typedef struct { /////////////////// /// Literals; see comments in lzma_common.h. - probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; + probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE]; /// If 1, it's a match. Otherwise it's a single 8-bit literal. probability is_match[STATES][POS_STATES_MAX]; @@ -232,7 +168,7 @@ typedef struct { uint32_t pos_mask; // (1U << pb) - 1 uint32_t literal_context_bits; - uint32_t literal_pos_mask; + uint32_t literal_mask; /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of /// payload marker is expected. @@ -251,22 +187,26 @@ typedef struct { enum { SEQ_NORMALIZE, SEQ_IS_MATCH, - seq_8(SEQ_LITERAL), - seq_8(SEQ_LITERAL_MATCHED), + SEQ_LITERAL, + SEQ_LITERAL_MATCHED, SEQ_LITERAL_WRITE, SEQ_IS_REP, - seq_len(SEQ_MATCH_LEN), - seq_6(SEQ_DIST_SLOT), + SEQ_MATCH_LEN_CHOICE, + SEQ_MATCH_LEN_CHOICE2, + SEQ_MATCH_LEN_BITTREE, + SEQ_DIST_SLOT, SEQ_DIST_MODEL, SEQ_DIRECT, - seq_4(SEQ_ALIGN), + SEQ_ALIGN, SEQ_EOPM, SEQ_IS_REP0, SEQ_SHORTREP, SEQ_IS_REP0_LONG, SEQ_IS_REP1, SEQ_IS_REP2, - seq_len(SEQ_REP_LEN), + SEQ_REP_LEN_CHOICE, + SEQ_REP_LEN_CHOICE2, + SEQ_REP_LEN_BITTREE, SEQ_COPY, } sequence; @@ -321,7 +261,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, const size_t dict_start = dict.pos; // Range decoder - rc_to_local(coder->rc, *in_pos); + rc_to_local(coder->rc, *in_pos, LZMA_IN_REQUIRED); // State uint32_t state = coder->state; @@ -340,7 +280,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, uint32_t offset = coder->offset; uint32_t len = coder->len; - const uint32_t literal_pos_mask = coder->literal_pos_mask; + const uint32_t literal_mask = coder->literal_mask; const uint32_t literal_context_bits = coder->literal_context_bits; // Temporary variables @@ -367,8 +307,24 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, might_finish_without_eopm = true; } - // The main decoder loop. The "switch" is used to restart the decoder at - // correct location. Once restarted, the "switch" is no longer used. + // The main decoder loop. The "switch" is used to resume the decoder at + // correct location. Once resumed, the "switch" is no longer used. + // The decoder loops is split into two modes: + // + // 1 - Non-resumable mode (fast). This is used when it is guaranteed + // there is enough input to decode the next symbol. If the output + // limit is reached, then the decoder loop will save the place + // for the resumable mode to continue. This mode is not used if + // HAVE_SMALL is defined. This is faster than Resumable mode + // because it reduces the number of branches needed and allows + // for more compiler optimizations. + // + // 2 - Resumable mode (slow). This is used when a previous decoder + // loop did not have enough space in the input or output buffers + // to complete. It uses sequence enum values to set remind + // coder->sequence where to resume in the decoder loop. This + // is the only mode used when HAVE_SMALL is defined. + switch (coder->sequence) while (true) { // Calculate new pos_state. This is skipped on the first loop @@ -376,13 +332,339 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, // variables. pos_state = dict.pos & pos_mask; +#ifndef HAVE_SMALL + + /////////////////////////////// + // Non-resumable Mode (fast) // + /////////////////////////////// + + // Go to Resumable mode (1) if there is not enough input to + // safely decode any possible LZMA symbol or (2) if the + // dictionary is full, which may need special checks that + // are only done in the Resumable mode. + if (unlikely(!rc_is_fast_allowed() + || dict.pos == dict.limit)) + goto slow; + + // Decode the first bit from the next LZMA symbol. + // If the bit is a 0, then we handle it as a literal. + // If the bit is a 1, then it is a match of previously + // decoded data. + rc_if_0(coder->is_match[state][pos_state]) { + ///////////////////// + // Decode literal. // + ///////////////////// + + // Update the RC that we have decoded a 0. + rc_update_0(coder->is_match[state][pos_state]); + + // Get the correct probability array from lp and + // lc params. + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_mask, + dict.pos, dict_get0(&dict)); + + if (is_literal_state(state)) { + update_literal_normal(state); + + // Decode literal without match byte. + rc_bittree8(probs, 0); + } else { + update_literal_matched(state); + + // Decode literal with match byte. + rc_matched_literal(probs, + dict_get(&dict, rep0)); + } + + // Write decoded literal to dictionary + dict_put(&dict, symbol); + continue; + } + + /////////////////// + // Decode match. // + /////////////////// + + // Instead of a new byte we are going to decode a + // distance-length pair. The distance represents how far + // back in the dictionary to begin copying. The length + // represents how many bytes to copy. + + rc_update_1(coder->is_match[state][pos_state]); + + rc_if_0(coder->is_rep[state]) { + /////////////////// + // Simple match. // + /////////////////// + + // Not a repeated match. In this case, + // the length (how many bytes to copy) must be + // decoded first. Then, the distance (where to + // start copying) is decoded. + // + // This is also how we know when we are done + // decoding. If the distance decodes to UINT32_MAX, + // then we know to stop decoding (end of payload + // marker). + + rc_update_0(coder->is_rep[state]); + update_match(state); + + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + len_decode_fast(len, coder->match_len_decoder, + pos_state); + + // Next, decode the distance into rep0. + + // The next 6 bits determine how to decode the + // rest of the distance. + probs = coder->dist_slot[get_dist_state(len)]; + + rc_bittree6(probs, -DIST_SLOTS); + assert(symbol <= 63); + + if (symbol < DIST_MODEL_START) { + // If the decoded symbol is < DIST_MODEL_START + // then we use its value directly as the + // match distance. No other bits are needed. + // The only possible distance values + // are [0, 3]. + rep0 = symbol; + } else { + // Use the first two bits of symbol as the + // highest bits of the match distance. + + // "limit" represents the number of low bits + // to decode. + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < DIST_MODEL_END) { + // When symbol is > DIST_MODEL_START, + // but symbol < DIST_MODEL_END, then + // it can decode distances between + // [4, 127]. + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 1; + + // Variable number (1-5) of bits + // from a reverse bittree. This + // isn't worth manual unrolling. + // + // NOTE: Making one or many of the + // variables (probs, symbol, offset, + // or limit) local here (instead of + // using those declared outside the + // main loop) can affect code size + // and performance which isn't a + // surprise but it's not so clear + // what is the best. + do { + rc_bit_add_if_1(probs, + rep0, offset); + offset <<= 1; + } while (--limit > 0); + } else { + // The distance is >= 128. Decode the + // lower bits without probabilities + // except the lowest four bits. + assert(symbol >= 14); + assert(limit >= 6); + + limit -= ALIGN_BITS; + assert(limit >= 2); + + rc_direct(rep0, limit); + + // Decode the lowest four bits using + // probabilities. + rep0 <<= ALIGN_BITS; + rc_bittree_rev4(coder->pos_align); + rep0 += symbol; + + // If the end of payload marker (EOPM) + // is detected, jump to the safe code. + // The EOPM handling isn't speed + // critical at all. + // + // A final normalization is needed + // after the EOPM (there can be a + // dummy byte to read in some cases). + // If the normalization was done here + // in the fast code, it would need to + // be taken into account in the value + // of LZMA_IN_REQUIRED. Using the + // safe code allows keeping + // LZMA_IN_REQUIRED as 20 instead of + // 21. + if (rep0 == UINT32_MAX) + goto eopm; + } + } + + // Validate the distance we just decoded. + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + } else { + rc_update_1(coder->is_rep[state]); + + ///////////////////// + // Repeated match. // + ///////////////////// + + // The match distance is a value that we have decoded + // recently. The latest four match distances are + // available as rep0, rep1, rep2 and rep3. We will + // now decode which of them is the new distance. + // + // There cannot be a match if we haven't produced + // any output, so check that first. + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + rc_if_0(coder->is_rep0[state]) { + rc_update_0(coder->is_rep0[state]); + // The distance is rep0. + + // Decode the next bit to determine if 1 byte + // should be copied from rep0 distance or + // if the number of bytes needs to be decoded. + + // If the next bit is 0, then it is a + // "Short Rep Match" and only 1 bit is copied. + // Otherwise, the length of the match is + // decoded after the "else" statement. + rc_if_0(coder->is_rep0_long[state][pos_state]) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); + + update_short_rep(state); + dict_put(&dict, dict_get(&dict, rep0)); + continue; + } + + // Repeating more than one byte at + // distance of rep0. + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + + } else { + rc_update_1(coder->is_rep0[state]); + + // The distance is rep1, rep2 or rep3. Once + // we find out which one of these three, it + // is stored to rep0 and rep1, rep2 and rep3 + // are updated accordingly. There is no + // "Short Rep Match" option, so the length + // of the match must always be decoded next. + rc_if_0(coder->is_rep1[state]) { + // The distance is rep1. + rc_update_0(coder->is_rep1[state]); + + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep1[state]); + + rc_if_0(coder->is_rep2[state]) { + // The distance is rep2. + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + // The distance is rep3. + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + } + } + } + + update_long_rep(state); + + // Decode the length of the repeated match. + len_decode_fast(len, coder->rep_len_decoder, + pos_state); + } + + ///////////////////////////////// + // Repeat from history buffer. // + ///////////////////////////////// + + // The length is always between these limits. There is no way + // to trigger the algorithm to set len outside this range. + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + + // Repeat len bytes from distance of rep0. + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } + + continue; + +slow: +#endif + /////////////////////////// + // Resumable Mode (slow) // + /////////////////////////// + + // This is very similar to Non-resumable Mode, so most of the + // comments are not repeated. The main differences are: + // - case labels are used to resume at the correct location. + // - Loops are not unrolled. + // - Range coder macros take an extra sequence argument + // so they can save to coder->sequence the location to + // resume in case there is not enough input. case SEQ_NORMALIZE: case SEQ_IS_MATCH: if (unlikely(might_finish_without_eopm && dict.pos == dict.limit)) { // In rare cases there is a useless byte that needs // to be read anyway. - rc_normalize(SEQ_NORMALIZE); + rc_normalize_safe(SEQ_NORMALIZE); // If the range decoder state is such that we can // be at the end of the LZMA stream, then the @@ -405,49 +687,37 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, eopm_is_valid = true; } - rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { - rc_update_0(coder->is_match[state][pos_state]); + rc_if_0_safe(coder->is_match[state][pos_state], SEQ_IS_MATCH) { + ///////////////////// + // Decode literal. // + ///////////////////// - // It's a literal i.e. a single 8-bit byte. + rc_update_0(coder->is_match[state][pos_state]); probs = literal_subcoder(coder->literal, - literal_context_bits, literal_pos_mask, - dict.pos, dict_get(&dict, 0)); + literal_context_bits, literal_mask, + dict.pos, dict_get0(&dict)); symbol = 1; if (is_literal_state(state)) { + update_literal_normal(state); + // Decode literal without match byte. -#ifdef HAVE_SMALL + // The "slow" version does not unroll + // the loop. case SEQ_LITERAL: do { - rc_bit(probs[symbol], , , SEQ_LITERAL); + rc_bit_safe(probs[symbol], , , + SEQ_LITERAL); } while (symbol < (1 << 8)); -#else - rc_bit_case(probs[symbol], , , SEQ_LITERAL0); - rc_bit_case(probs[symbol], , , SEQ_LITERAL1); - rc_bit_case(probs[symbol], , , SEQ_LITERAL2); - rc_bit_case(probs[symbol], , , SEQ_LITERAL3); - rc_bit_case(probs[symbol], , , SEQ_LITERAL4); - rc_bit_case(probs[symbol], , , SEQ_LITERAL5); - rc_bit_case(probs[symbol], , , SEQ_LITERAL6); - rc_bit_case(probs[symbol], , , SEQ_LITERAL7); -#endif } else { + update_literal_matched(state); + // Decode literal with match byte. - // - // We store the byte we compare against - // ("match byte") to "len" to minimize the - // number of variables we need to store - // between decoder calls. len = (uint32_t)(dict_get(&dict, rep0)) << 1; - // The usage of "offset" allows omitting some - // branches, which should give tiny speed - // improvement on some CPUs. "offset" gets - // set to zero if match_bit didn't match. offset = 0x100; -#ifdef HAVE_SMALL case SEQ_LITERAL_MATCHED: do { const uint32_t match_bit @@ -456,7 +726,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, = offset + match_bit + symbol; - rc_bit(probs[subcoder_index], + rc_bit_safe(probs[subcoder_index], offset &= ~match_bit, offset &= match_bit, SEQ_LITERAL_MATCHED); @@ -469,61 +739,10 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, len <<= 1; } while (symbol < (1 << 8)); -#else - // Unroll the loop. - uint32_t match_bit; - uint32_t subcoder_index; - -# define d(seq) \ - case seq: \ - match_bit = len & offset; \ - subcoder_index = offset + match_bit + symbol; \ - rc_bit(probs[subcoder_index], \ - offset &= ~match_bit, \ - offset &= match_bit, \ - seq) - - d(SEQ_LITERAL_MATCHED0); - len <<= 1; - d(SEQ_LITERAL_MATCHED1); - len <<= 1; - d(SEQ_LITERAL_MATCHED2); - len <<= 1; - d(SEQ_LITERAL_MATCHED3); - len <<= 1; - d(SEQ_LITERAL_MATCHED4); - len <<= 1; - d(SEQ_LITERAL_MATCHED5); - len <<= 1; - d(SEQ_LITERAL_MATCHED6); - len <<= 1; - d(SEQ_LITERAL_MATCHED7); -# undef d -#endif } - //update_literal(state); - // Use a lookup table to update to literal state, - // since compared to other state updates, this would - // need two branches. - static const lzma_lzma_state next_state[] = { - STATE_LIT_LIT, - STATE_LIT_LIT, - STATE_LIT_LIT, - STATE_LIT_LIT, - STATE_MATCH_LIT_LIT, - STATE_REP_LIT_LIT, - STATE_SHORTREP_LIT_LIT, - STATE_MATCH_LIT, - STATE_REP_LIT, - STATE_SHORTREP_LIT, - STATE_MATCH_LIT, - STATE_REP_LIT - }; - state = next_state[state]; - case SEQ_LITERAL_WRITE: - if (unlikely(dict_put(&dict, symbol))) { + if (dict_put_safe(&dict, symbol)) { coder->sequence = SEQ_LITERAL_WRITE; goto out; } @@ -531,64 +750,47 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, continue; } - // Instead of a new byte we are going to get a byte range - // (distance and length) which will be repeated from our - // output history. + /////////////////// + // Decode match. // + /////////////////// rc_update_1(coder->is_match[state][pos_state]); case SEQ_IS_REP: - rc_if_0(coder->is_rep[state], SEQ_IS_REP) { - // Not a repeated match + rc_if_0_safe(coder->is_rep[state], SEQ_IS_REP) { + /////////////////// + // Simple match. // + /////////////////// + rc_update_0(coder->is_rep[state]); update_match(state); - // The latest three match distances are kept in - // memory in case there are repeated matches. rep3 = rep2; rep2 = rep1; rep1 = rep0; - // Decode the length of the match. len_decode(len, coder->match_len_decoder, pos_state, SEQ_MATCH_LEN); - // Prepare to decode the highest two bits of the - // match distance. probs = coder->dist_slot[get_dist_state(len)]; symbol = 1; -#ifdef HAVE_SMALL case SEQ_DIST_SLOT: do { - rc_bit(probs[symbol], , , SEQ_DIST_SLOT); + rc_bit_safe(probs[symbol], , , SEQ_DIST_SLOT); } while (symbol < DIST_SLOTS); -#else - rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0); - rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1); - rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2); - rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3); - rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4); - rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5); -#endif - // Get rid of the highest bit that was needed for - // indexing of the probability array. + symbol -= DIST_SLOTS; assert(symbol <= 63); if (symbol < DIST_MODEL_START) { - // Match distances [0, 3] have only two bits. rep0 = symbol; } else { - // Decode the lowest [1, 29] bits of - // the match distance. limit = (symbol >> 1) - 1; assert(limit >= 1 && limit <= 30); rep0 = 2 + (symbol & 1); if (symbol < DIST_MODEL_END) { - // Prepare to decode the low bits for - // a distance of [4, 127]. assert(limit <= 5); rep0 <<= limit; assert(rep0 <= 96); @@ -607,95 +809,36 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, symbol = 1; offset = 0; case SEQ_DIST_MODEL: -#ifdef HAVE_SMALL do { - rc_bit(probs[symbol], , + rc_bit_safe(probs[symbol], , rep0 += 1U << offset, SEQ_DIST_MODEL); } while (++offset < limit); -#else - switch (limit) { - case 5: - assert(offset == 0); - rc_bit(probs[symbol], , - rep0 += 1U, - SEQ_DIST_MODEL); - ++offset; - --limit; - case 4: - rc_bit(probs[symbol], , - rep0 += 1U << offset, - SEQ_DIST_MODEL); - ++offset; - --limit; - case 3: - rc_bit(probs[symbol], , - rep0 += 1U << offset, - SEQ_DIST_MODEL); - ++offset; - --limit; - case 2: - rc_bit(probs[symbol], , - rep0 += 1U << offset, - SEQ_DIST_MODEL); - ++offset; - --limit; - case 1: - // We need "symbol" only for - // indexing the probability - // array, thus we can use - // rc_bit_last() here to omit - // the unneeded updating of - // "symbol". - rc_bit_last(probs[symbol], , - rep0 += 1U << offset, - SEQ_DIST_MODEL); - } -#endif } else { - // The distance is >= 128. Decode the - // lower bits without probabilities - // except the lowest four bits. assert(symbol >= 14); assert(limit >= 6); limit -= ALIGN_BITS; assert(limit >= 2); case SEQ_DIRECT: - // Not worth manual unrolling - do { - rc_direct(rep0, SEQ_DIRECT); - } while (--limit > 0); + rc_direct_safe(rep0, limit, + SEQ_DIRECT); - // Decode the lowest four bits using - // probabilities. rep0 <<= ALIGN_BITS; - symbol = 1; -#ifdef HAVE_SMALL - offset = 0; + symbol = 0; + offset = 1; case SEQ_ALIGN: do { - rc_bit(coder->pos_align[ - symbol], , - rep0 += 1U << offset, + rc_bit_last_safe( + coder->pos_align[ + offset + + symbol], + , + symbol += offset, SEQ_ALIGN); - } while (++offset < ALIGN_BITS); -#else - case SEQ_ALIGN0: - rc_bit(coder->pos_align[symbol], , - rep0 += 1, SEQ_ALIGN0); - case SEQ_ALIGN1: - rc_bit(coder->pos_align[symbol], , - rep0 += 2, SEQ_ALIGN1); - case SEQ_ALIGN2: - rc_bit(coder->pos_align[symbol], , - rep0 += 4, SEQ_ALIGN2); - case SEQ_ALIGN3: - // Like in SEQ_DIST_MODEL, we don't - // need "symbol" for anything else - // than indexing the probability array. - rc_bit_last(coder->pos_align[symbol], , - rep0 += 8, SEQ_ALIGN3); -#endif + offset <<= 1; + } while (offset < ALIGN_SIZE); + + rep0 += symbol; if (rep0 == UINT32_MAX) { // End of payload marker was @@ -710,6 +853,9 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, // that EOPM might be used // (it's not allowed in // LZMA2). +#ifndef HAVE_SMALL +eopm: +#endif if (!eopm_is_valid) { ret = LZMA_DATA_ERROR; goto out; @@ -718,7 +864,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, case SEQ_EOPM: // LZMA1 stream with // end-of-payload marker. - rc_normalize(SEQ_EOPM); + rc_normalize_safe(SEQ_EOPM); ret = rc_is_finished(rc) ? LZMA_STREAM_END : LZMA_DATA_ERROR; @@ -727,36 +873,30 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, } } - // Validate the distance we just decoded. if (unlikely(!dict_is_distance_valid(&dict, rep0))) { ret = LZMA_DATA_ERROR; goto out; } } else { + ///////////////////// + // Repeated match. // + ///////////////////// + rc_update_1(coder->is_rep[state]); - // Repeated match - // - // The match distance is a value that we have had - // earlier. The latest four match distances are - // available as rep0, rep1, rep2 and rep3. We will - // now decode which of them is the new distance. - // - // There cannot be a match if we haven't produced - // any output, so check that first. if (unlikely(!dict_is_distance_valid(&dict, 0))) { ret = LZMA_DATA_ERROR; goto out; } case SEQ_IS_REP0: - rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { + rc_if_0_safe(coder->is_rep0[state], SEQ_IS_REP0) { rc_update_0(coder->is_rep0[state]); - // The distance is rep0. case SEQ_IS_REP0_LONG: - rc_if_0(coder->is_rep0_long[state][pos_state], + rc_if_0_safe(coder->is_rep0_long + [state][pos_state], SEQ_IS_REP0_LONG) { rc_update_0(coder->is_rep0_long[ state][pos_state]); @@ -764,8 +904,9 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, update_short_rep(state); case SEQ_SHORTREP: - if (unlikely(dict_put(&dict, dict_get( - &dict, rep0)))) { + if (dict_put_safe(&dict, + dict_get(&dict, + rep0))) { coder->sequence = SEQ_SHORTREP; goto out; } @@ -773,8 +914,6 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, continue; } - // Repeating more than one byte at - // distance of rep0. rc_update_1(coder->is_rep0_long[ state][pos_state]); @@ -782,11 +921,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, rc_update_1(coder->is_rep0[state]); case SEQ_IS_REP1: - // The distance is rep1, rep2 or rep3. Once - // we find out which one of these three, it - // is stored to rep0 and rep1, rep2 and rep3 - // are updated accordingly. - rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { + rc_if_0_safe(coder->is_rep1[state], SEQ_IS_REP1) { rc_update_0(coder->is_rep1[state]); const uint32_t distance = rep1; @@ -796,7 +931,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, } else { rc_update_1(coder->is_rep1[state]); case SEQ_IS_REP2: - rc_if_0(coder->is_rep2[state], + rc_if_0_safe(coder->is_rep2[state], SEQ_IS_REP2) { rc_update_0(coder->is_rep2[ state]); @@ -821,7 +956,6 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, update_long_rep(state); - // Decode the length of the repeated match. len_decode(len, coder->rep_len_decoder, pos_state, SEQ_REP_LEN); } @@ -830,13 +964,10 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, // Repeat from history buffer. // ///////////////////////////////// - // The length is always between these limits. There is no way - // to trigger the algorithm to set len outside this range. assert(len >= MATCH_LEN_MIN); assert(len <= MATCH_LEN_MAX); case SEQ_COPY: - // Repeat len bytes from distance of rep0. if (unlikely(dict_repeat(&dict, rep0, &len))) { coder->sequence = SEQ_COPY; goto out; @@ -890,7 +1021,6 @@ out: } - static void lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size, bool allow_eopm) @@ -917,7 +1047,7 @@ lzma_decoder_reset(void *coder_ptr, const void *opt) literal_init(coder->literal, options->lc, options->lp); coder->literal_context_bits = options->lc; - coder->literal_pos_mask = (1U << options->lp) - 1; + coder->literal_mask = literal_mask_calc(options->lc, options->lp); // State coder->state = STATE_LIT_LIT; diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_decoder.h b/contrib/libs/lzma/liblzma/lzma/lzma_decoder.h index 1427bc2461..9730f56fc2 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_decoder.h +++ b/contrib/libs/lzma/liblzma/lzma/lzma_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_decoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZMA_DECODER_H diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder.c b/contrib/libs/lzma/liblzma/lzma/lzma_encoder.c index 79fdeca290..543ca321c3 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder.c +++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lzma2_encoder.h" @@ -49,24 +48,24 @@ literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position) const uint8_t cur_byte = mf->buffer[ mf->read_pos - mf->read_ahead]; probability *subcoder = literal_subcoder(coder->literal, - coder->literal_context_bits, coder->literal_pos_mask, + coder->literal_context_bits, coder->literal_mask, position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); if (is_literal_state(coder->state)) { // Previous LZMA-symbol was a literal. Encode a normal // literal without a match byte. + update_literal_normal(coder->state); rc_bittree(&coder->rc, subcoder, 8, cur_byte); } else { // Previous LZMA-symbol was a match. Use the last byte of // the match as a "match byte". That is, compare the bits // of the current literal and the match byte. + update_literal_matched(coder->state); const uint8_t match_byte = mf->buffer[ mf->read_pos - coder->reps[0] - 1 - mf->read_ahead]; literal_matched(&coder->rc, subcoder, match_byte, cur_byte); } - - update_literal(coder->state); } @@ -283,7 +282,7 @@ encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf) mf_skip(mf, 1); mf->read_ahead = 0; rc_bit(&coder->rc, &coder->is_match[0][0], 0); - rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); + rc_bittree(&coder->rc, coder->literal + 0, 8, mf->buffer[0]); ++coder->uncomp_size; } @@ -535,7 +534,7 @@ lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder, coder->pos_mask = (1U << options->pb) - 1; coder->literal_context_bits = options->lc; - coder->literal_pos_mask = (1U << options->lp) - 1; + coder->literal_mask = literal_mask_calc(options->lc, options->lp); // Range coder rc_reset(&coder->rc); diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder.h b/contrib/libs/lzma/liblzma/lzma/lzma_encoder.h index 84d8c9163f..e8ae807930 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder.h +++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZMA_ENCODER_H diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_optimum_fast.c b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_optimum_fast.c index 6c53d2bd00..0f063d5be7 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_optimum_fast.c +++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_optimum_fast.c @@ -1,12 +1,11 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder_optimum_fast.c // // Author: Igor Pavlov // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lzma_encoder_private.h" diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_optimum_normal.c b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_optimum_normal.c index 101c8d4790..a6c0398f3a 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_optimum_normal.c +++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_optimum_normal.c @@ -1,12 +1,11 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder_optimum_normal.c // // Author: Igor Pavlov // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lzma_encoder_private.h" @@ -24,7 +23,7 @@ get_literal_price(const lzma_lzma1_encoder *const coder, const uint32_t pos, uint32_t match_byte, uint32_t symbol) { const probability *const subcoder = literal_subcoder(coder->literal, - coder->literal_context_bits, coder->literal_pos_mask, + coder->literal_context_bits, coder->literal_mask, pos, prev_byte); uint32_t price = 0; diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_presets.c b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_presets.c index 711df02552..e53483f995 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_presets.c +++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_presets.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder_presets.c @@ -6,9 +8,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "common.h" diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h index b228c57761..eeea5e9c12 100644 --- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h +++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder_private.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZMA_ENCODER_PRIVATE_H @@ -116,10 +115,10 @@ struct lzma_lzma1_encoder_s { uint32_t pos_mask; ///< (1 << pos_bits) - 1 uint32_t literal_context_bits; - uint32_t literal_pos_mask; + uint32_t literal_mask; // These are the same as in lzma_decoder.c. See comments there. - probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; + probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE]; probability is_match[STATES][POS_STATES_MAX]; probability is_rep[STATES]; probability is_rep0[STATES]; diff --git a/contrib/libs/lzma/liblzma/rangecoder/price.h b/contrib/libs/lzma/liblzma/rangecoder/price.h index 45dbbbb20c..cce6bdae5f 100644 --- a/contrib/libs/lzma/liblzma/rangecoder/price.h +++ b/contrib/libs/lzma/liblzma/rangecoder/price.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file price.h @@ -5,9 +7,6 @@ // // Author: Igor Pavlov // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_PRICE_H diff --git a/contrib/libs/lzma/liblzma/rangecoder/price_table.c b/contrib/libs/lzma/liblzma/rangecoder/price_table.c index ac64bf62c7..c33433f718 100644 --- a/contrib/libs/lzma/liblzma/rangecoder/price_table.c +++ b/contrib/libs/lzma/liblzma/rangecoder/price_table.c @@ -1,4 +1,6 @@ -/* This file has been automatically generated by price_tablegen.c. */ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by price_tablegen.c. #include "range_encoder.h" diff --git a/contrib/libs/lzma/liblzma/rangecoder/range_common.h b/contrib/libs/lzma/liblzma/rangecoder/range_common.h index 2c74dc1537..ac4dbe196f 100644 --- a/contrib/libs/lzma/liblzma/rangecoder/range_common.h +++ b/contrib/libs/lzma/liblzma/rangecoder/range_common.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file range_common.h @@ -6,15 +8,15 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_RANGE_COMMON_H #define LZMA_RANGE_COMMON_H -#include "common.h" +// Skip common.h if building price_tablegen.c. +#ifndef BUILDING_PRICE_TABLEGEN +# include "common.h" +#endif /////////////// @@ -66,6 +68,10 @@ /// /// I will be sticking to uint16_t unless some specific architectures /// are *much* faster (20-50 %) with uint32_t. +/// +/// Update in 2024: The branchless C and x86-64 assembly was written so that +/// probability is assumed to be uint16_t. (In contrast, LZMA SDK 23.01 +/// assembly supports both types.) typedef uint16_t probability; #endif diff --git a/contrib/libs/lzma/liblzma/rangecoder/range_decoder.h b/contrib/libs/lzma/liblzma/rangecoder/range_decoder.h index e0b051fac2..31a58d1ffb 100644 --- a/contrib/libs/lzma/liblzma/rangecoder/range_decoder.h +++ b/contrib/libs/lzma/liblzma/rangecoder/range_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file range_decoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_RANGE_DECODER_H @@ -17,6 +16,55 @@ #include "range_common.h" +// Choose the range decoder variants to use using a bitmask. +// If no bits are set, only the basic version is used. +// If more than one version is selected for the same feature, +// the last one on the list below is used. +// +// Bitwise-or of the following enable branchless C versions: +// 0x01 normal bittrees +// 0x02 fixed-sized reverse bittrees +// 0x04 variable-sized reverse bittrees (not faster) +// 0x08 matched literal (not faster) +// +// GCC & Clang compatible x86-64 inline assembly: +// 0x010 normal bittrees +// 0x020 fixed-sized reverse bittrees +// 0x040 variable-sized reverse bittrees +// 0x080 matched literal +// 0x100 direct bits +// +// The default can be overridden at build time by defining +// LZMA_RANGE_DECODER_CONFIG to the desired mask. +// +// 2024-02-22: Feedback from benchmarks: +// - Brancless C (0x003) can be better than basic on x86-64 but often it's +// slightly worse on other archs. Since asm is much better on x86-64, +// branchless C is not used at all. +// - With x86-64 asm, there are slight differences between GCC and Clang +// and different processors. Overall 0x1F0 seems to be the best choice. +#ifndef LZMA_RANGE_DECODER_CONFIG +# if defined(__x86_64__) && !defined(__ILP32__) \ + && !defined(__NVCOMPILER) \ + && (defined(__GNUC__) || defined(__clang__)) +# define LZMA_RANGE_DECODER_CONFIG 0x1F0 +# else +# define LZMA_RANGE_DECODER_CONFIG 0 +# endif +#endif + + +// Negative RC_BIT_MODEL_TOTAL but the lowest RC_MOVE_BITS are flipped. +// This is useful for updating probability variables in branchless decoding: +// +// uint32_t decoded_bit = ...; +// probability tmp = RC_BIT_MODEL_OFFSET; +// tmp &= decoded_bit - 1; +// prob -= (prob + tmp) >> RC_MOVE_BITS; +#define RC_BIT_MODEL_OFFSET \ + ((UINT32_C(1) << RC_MOVE_BITS) - 1 - RC_BIT_MODEL_TOTAL) + + typedef struct { uint32_t range; uint32_t code; @@ -50,18 +98,28 @@ rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in, /// Makes local copies of range decoder and *in_pos variables. Doing this /// improves speed significantly. The range decoder macros expect also -/// variables `in' and `in_size' to be defined. -#define rc_to_local(range_decoder, in_pos) \ +/// variables 'in' and 'in_size' to be defined. +#define rc_to_local(range_decoder, in_pos, fast_mode_in_required) \ lzma_range_decoder rc = range_decoder; \ - size_t rc_in_pos = (in_pos); \ + const uint8_t *rc_in_ptr = in + (in_pos); \ + const uint8_t *rc_in_end = in + in_size; \ + const uint8_t *rc_in_fast_end \ + = (rc_in_end - rc_in_ptr) <= (fast_mode_in_required) \ + ? rc_in_ptr \ + : rc_in_end - (fast_mode_in_required); \ + (void)rc_in_fast_end; /* Silence a warning with HAVE_SMALL. */ \ uint32_t rc_bound +/// Evaluates to true if there is enough input remaining to use fast mode. +#define rc_is_fast_allowed() (rc_in_ptr < rc_in_fast_end) + + /// Stores the local copes back to the range decoder structure. #define rc_from_local(range_decoder, in_pos) \ do { \ range_decoder = rc; \ - in_pos = rc_in_pos; \ + in_pos = (size_t)(rc_in_ptr - in); \ } while (0) @@ -81,18 +139,30 @@ do { \ ((range_decoder).code == 0) -/// Read the next input byte if needed. If more input is needed but there is +// Read the next input byte if needed. +#define rc_normalize() \ +do { \ + if (rc.range < RC_TOP_VALUE) { \ + rc.range <<= RC_SHIFT_BITS; \ + rc.code = (rc.code << RC_SHIFT_BITS) | *rc_in_ptr++; \ + } \ +} while (0) + + +/// If more input is needed but there is /// no more input available, "goto out" is used to jump out of the main -/// decoder loop. -#define rc_normalize(seq) \ +/// decoder loop. The "_safe" macros are used in the Resumable decoder +/// mode in order to save the sequence to continue decoding from that +/// point later. +#define rc_normalize_safe(seq) \ do { \ if (rc.range < RC_TOP_VALUE) { \ - if (unlikely(rc_in_pos == in_size)) { \ + if (rc_in_ptr == rc_in_end) { \ coder->sequence = seq; \ goto out; \ } \ rc.range <<= RC_SHIFT_BITS; \ - rc.code = (rc.code << RC_SHIFT_BITS) | in[rc_in_pos++]; \ + rc.code = (rc.code << RC_SHIFT_BITS) | *rc_in_ptr++; \ } \ } while (0) @@ -100,7 +170,7 @@ do { \ /// Start decoding a bit. This must be used together with rc_update_0() /// and rc_update_1(): /// -/// rc_if_0(prob, seq) { +/// rc_if_0(prob) { /// rc_update_0(prob); /// // Do something /// } else { @@ -108,18 +178,28 @@ do { \ /// // Do something else /// } /// -#define rc_if_0(prob, seq) \ - rc_normalize(seq); \ +#define rc_if_0(prob) \ + rc_normalize(); \ + rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \ + if (rc.code < rc_bound) + + +#define rc_if_0_safe(prob, seq) \ + rc_normalize_safe(seq); \ rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \ if (rc.code < rc_bound) /// Update the range decoder state and the used probability variable to /// match a decoded bit of 0. +/// +/// The x86-64 assembly uses the commented method but it seems that, +/// at least on x86-64, the first version is slightly faster as C code. #define rc_update_0(prob) \ do { \ rc.range = rc_bound; \ prob += (RC_BIT_MODEL_TOTAL - (prob)) >> RC_MOVE_BITS; \ + /* prob -= ((prob) + RC_BIT_MODEL_OFFSET) >> RC_MOVE_BITS; */ \ } while (0) @@ -137,9 +217,21 @@ do { \ /// This macro is used as the last step in bittree reverse decoders since /// those don't use "symbol" for anything else than indexing the probability /// arrays. -#define rc_bit_last(prob, action0, action1, seq) \ +#define rc_bit_last(prob, action0, action1) \ +do { \ + rc_if_0(prob) { \ + rc_update_0(prob); \ + action0; \ + } else { \ + rc_update_1(prob); \ + action1; \ + } \ +} while (0) + + +#define rc_bit_last_safe(prob, action0, action1, seq) \ do { \ - rc_if_0(prob, seq) { \ + rc_if_0_safe(prob, seq) { \ rc_update_0(prob); \ action0; \ } else { \ @@ -151,35 +243,724 @@ do { \ /// Decodes one bit, updates "symbol", and runs action0 or action1 depending /// on the decoded bit. -#define rc_bit(prob, action0, action1, seq) \ +#define rc_bit(prob, action0, action1) \ rc_bit_last(prob, \ symbol <<= 1; action0, \ + symbol = (symbol << 1) + 1; action1); + + +#define rc_bit_safe(prob, action0, action1, seq) \ + rc_bit_last_safe(prob, \ + symbol <<= 1; action0, \ symbol = (symbol << 1) + 1; action1, \ seq); +// Unroll fixed-sized bittree decoding. +// +// A compile-time constant in final_add can be used to get rid of the high bit +// from symbol that is used for the array indexing (1U << bittree_bits). +// final_add may also be used to add offset to the result (LZMA length +// decoder does that). +// +// The reason to have final_add here is that in the asm code the addition +// can be done for free: in x86-64 there is SBB instruction with -1 as +// the immediate value, and final_add is combined with that value. +#define rc_bittree_bit(prob) \ + rc_bit(prob, , ) + +#define rc_bittree3(probs, final_add) \ +do { \ + symbol = 1; \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + symbol += (uint32_t)(final_add); \ +} while (0) -/// Like rc_bit() but add "case seq:" as a prefix. This makes the unrolled -/// loops more readable because the code isn't littered with "case" -/// statements. On the other hand this also makes it less readable, since -/// spotting the places where the decoder loop may be restarted is less -/// obvious. -#define rc_bit_case(prob, action0, action1, seq) \ - case seq: rc_bit(prob, action0, action1, seq) +#define rc_bittree6(probs, final_add) \ +do { \ + symbol = 1; \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + symbol += (uint32_t)(final_add); \ +} while (0) + +#define rc_bittree8(probs, final_add) \ +do { \ + symbol = 1; \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + symbol += (uint32_t)(final_add); \ +} while (0) + + +// Fixed-sized reverse bittree +#define rc_bittree_rev4(probs) \ +do { \ + symbol = 0; \ + rc_bit_last(probs[symbol + 1], , symbol += 1); \ + rc_bit_last(probs[symbol + 2], , symbol += 2); \ + rc_bit_last(probs[symbol + 4], , symbol += 4); \ + rc_bit_last(probs[symbol + 8], , symbol += 8); \ +} while (0) + + +// Decode one bit from variable-sized reverse bittree. The loop is done +// in the code that uses this macro. This could be changed if the assembly +// version benefited from having the loop done in assembly but it didn't +// seem so in early 2024. +// +// Also, if the loop was done here, the loop counter would likely be local +// to the macro so that it wouldn't modify yet another input variable. +// If a _safe version of a macro with a loop was done then a modifiable +// input variable couldn't be avoided though. +#define rc_bit_add_if_1(probs, dest, value_to_add_if_1) \ + rc_bit(probs[symbol], \ + , \ + dest += value_to_add_if_1); + + +// Matched literal +#define decode_with_match_bit \ + t_match_byte <<= 1; \ + t_match_bit = t_match_byte & t_offset; \ + t_subcoder_index = t_offset + t_match_bit + symbol; \ + rc_bit(probs[t_subcoder_index], \ + t_offset &= ~t_match_bit, \ + t_offset &= t_match_bit) + +#define rc_matched_literal(probs_base_var, match_byte) \ +do { \ + uint32_t t_match_byte = (match_byte); \ + uint32_t t_match_bit; \ + uint32_t t_subcoder_index; \ + uint32_t t_offset = 0x100; \ + symbol = 1; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ +} while (0) /// Decode a bit without using a probability. -#define rc_direct(dest, seq) \ +// +// NOTE: GCC 13 and Clang/LLVM 16 can, at least on x86-64, optimize the bound +// calculation to use an arithmetic right shift so there's no need to provide +// the alternative code which, according to C99/C11/C23 6.3.1.3-p3 isn't +// perfectly portable: rc_bound = (uint32_t)((int32_t)rc.code >> 31); +#define rc_direct(dest, count_var) \ do { \ - rc_normalize(seq); \ + dest = (dest << 1) + 1; \ + rc_normalize(); \ + rc.range >>= 1; \ + rc.code -= rc.range; \ + rc_bound = UINT32_C(0) - (rc.code >> 31); \ + dest += rc_bound; \ + rc.code += rc.range & rc_bound; \ +} while (--count_var > 0) + + + +#define rc_direct_safe(dest, count_var, seq) \ +do { \ + rc_normalize_safe(seq); \ rc.range >>= 1; \ rc.code -= rc.range; \ rc_bound = UINT32_C(0) - (rc.code >> 31); \ rc.code += rc.range & rc_bound; \ dest = (dest << 1) + (rc_bound + 1); \ +} while (--count_var > 0) + + +////////////////// +// Branchless C // +////////////////// + +/// Decode a bit using a branchless method. This reduces the number of +/// mispredicted branches and thus can improve speed. +#define rc_c_bit(prob, action_bit, action_neg) \ +do { \ + probability *p = &(prob); \ + rc_normalize(); \ + rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * *p; \ + uint32_t rc_mask = rc.code >= rc_bound; /* rc_mask = decoded bit */ \ + action_bit; /* action when rc_mask is 0 or 1 */ \ + /* rc_mask becomes 0 if bit is 0 and 0xFFFFFFFF if bit is 1: */ \ + rc_mask = 0U - rc_mask; \ + rc.range &= rc_mask; /* If bit 0: set rc.range = 0 */ \ + rc_bound ^= rc_mask; \ + rc_bound -= rc_mask; /* If bit 1: rc_bound = 0U - rc_bound */ \ + rc.range += rc_bound; \ + rc_bound &= rc_mask; \ + rc.code += rc_bound; \ + action_neg; /* action when rc_mask is 0 or 0xFFFFFFFF */ \ + rc_mask = ~rc_mask; /* If bit 0: all bits are set in rc_mask */ \ + rc_mask &= RC_BIT_MODEL_OFFSET; \ + *p -= (*p + rc_mask) >> RC_MOVE_BITS; \ } while (0) -// NOTE: No macros are provided for bittree decoding. It seems to be simpler -// to just write them open in the code. +// Testing on x86-64 give an impression that only the normal bittrees and +// the fixed-sized reverse bittrees are worth the branchless C code. +// It should be tested on other archs for which there isn't assembly code +// in this file. + +// Using addition in "(symbol << 1) + rc_mask" allows use of x86 LEA +// or RISC-V SH1ADD instructions. Compilers might infer it from +// "(symbol << 1) | rc_mask" too if they see that mask is 0 or 1 but +// the use of addition doesn't require such analysis from compilers. +#if LZMA_RANGE_DECODER_CONFIG & 0x01 +#undef rc_bittree_bit +#define rc_bittree_bit(prob) \ + rc_c_bit(prob, \ + symbol = (symbol << 1) + rc_mask, \ + ) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x01 + +#if LZMA_RANGE_DECODER_CONFIG & 0x02 +#undef rc_bittree_rev4 +#define rc_bittree_rev4(probs) \ +do { \ + symbol = 0; \ + rc_c_bit(probs[symbol + 1], symbol += rc_mask, ); \ + rc_c_bit(probs[symbol + 2], symbol += rc_mask << 1, ); \ + rc_c_bit(probs[symbol + 4], symbol += rc_mask << 2, ); \ + rc_c_bit(probs[symbol + 8], symbol += rc_mask << 3, ); \ +} while (0) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x02 + +#if LZMA_RANGE_DECODER_CONFIG & 0x04 +#undef rc_bit_add_if_1 +#define rc_bit_add_if_1(probs, dest, value_to_add_if_1) \ + rc_c_bit(probs[symbol], \ + symbol = (symbol << 1) + rc_mask, \ + dest += (value_to_add_if_1) & rc_mask) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x04 + + +#if LZMA_RANGE_DECODER_CONFIG & 0x08 +#undef decode_with_match_bit +#define decode_with_match_bit \ + t_match_byte <<= 1; \ + t_match_bit = t_match_byte & t_offset; \ + t_subcoder_index = t_offset + t_match_bit + symbol; \ + rc_c_bit(probs[t_subcoder_index], \ + symbol = (symbol << 1) + rc_mask, \ + t_offset &= ~t_match_bit ^ rc_mask) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x08 + + +//////////// +// x86-64 // +//////////// + +#if LZMA_RANGE_DECODER_CONFIG & 0x1F0 + +// rc_asm_y and rc_asm_n are used as arguments to macros to control which +// strings to include or omit. +#define rc_asm_y(str) str +#define rc_asm_n(str) + +// There are a few possible variations for normalization. +// This is the smallest variant which is also used by LZMA SDK. +// +// - This has partial register write (the MOV from (%[in_ptr])). +// +// - INC saves one byte in code size over ADD. False dependency on +// partial flags from INC shouldn't become a problem on any processor +// because the instructions after normalization don't read the flags +// until SUB which sets all flags. +// +#define rc_asm_normalize \ + "cmp %[top_value], %[range]\n\t" \ + "jae 1f\n\t" \ + "shl %[shift_bits], %[code]\n\t" \ + "mov (%[in_ptr]), %b[code]\n\t" \ + "shl %[shift_bits], %[range]\n\t" \ + "inc %[in_ptr]\n" \ + "1:\n" + +// rc_asm_calc(prob) is roughly equivalent to the C version of rc_if_0(prob)... +// +// rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); +// if (rc.code < rc_bound) +// +// ...but the bound is stored in "range": +// +// t0 = range; +// range = (range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); +// t0 -= range; +// t1 = code; +// code -= range; +// +// The carry flag (CF) from the last subtraction holds the negation of +// the decoded bit (if CF==0 then the decoded bit is 1). +// The values in t0 and t1 are needed for rc_update_0(prob) and +// rc_update_1(prob). If the bit is 0, rc_update_0(prob)... +// +// rc.range = rc_bound; +// +// ...has already been done but the "code -= range" has to be reverted using +// the old value stored in t1. (Also, prob needs to be updated.) +// +// If the bit is 1, rc_update_1(prob)... +// +// rc.range -= rc_bound; +// rc.code -= rc_bound; +// +// ...is already done for "code" but the value for "range" needs to be taken +// from t0. (Also, prob needs to be updated here as well.) +// +// The assignments from t0 and t1 can be done in a branchless manner with CMOV +// after the instructions from this macro. The CF from SUB tells which moves +// are needed. +#define rc_asm_calc(prob) \ + "mov %[range], %[t0]\n\t" \ + "shr %[bit_model_total_bits], %[range]\n\t" \ + "imul %[" prob "], %[range]\n\t" \ + "sub %[range], %[t0]\n\t" \ + "mov %[code], %[t1]\n\t" \ + "sub %[range], %[code]\n\t" + +// Also, prob needs to be updated: The update math depends on the decoded bit. +// It can be expressed in a few slightly different ways but this is fairly +// convenient here: +// +// prob -= (prob + (bit ? 0 : RC_BIT_MODEL_OFFSET)) >> RC_MOVE_BITS; +// +// To do it in branchless way when the negation of the decoded bit is in CF, +// both "prob" and "prob + RC_BIT_MODEL_OFFSET" are needed. Then the desired +// value can be picked with CMOV. The addition can be done using LEA without +// affecting CF. +// +// (This prob update method is a tiny bit different from LZMA SDK 23.01. +// In the LZMA SDK a single register is reserved solely for a constant to +// be used with CMOV when updating prob. That is fine since there are enough +// free registers to do so. The method used here uses one fewer register, +// which is valuable with inline assembly.) +// +// * * * +// +// In bittree decoding, each (unrolled) loop iteration decodes one bit +// and needs one prob variable. To make it faster, the prob variable of +// the iteration N+1 is loaded during iteration N. There are two possible +// prob variables to choose from for N+1. Both are loaded from memory and +// the correct one is chosen with CMOV using the same CF as is used for +// other things described above. +// +// This preloading/prefetching requires an extra register. To avoid +// useless moves from "preloaded prob register" to "current prob register", +// the macros swap between the two registers for odd and even iterations. +// +// * * * +// +// Finally, the decoded bit has to be stored in "symbol". Since the negation +// of the bit is in CF, this can be done with SBB: symbol -= CF - 1. That is, +// if the decoded bit is 0 (CF==1) the operation is a no-op "symbol -= 0" +// and when bit is 1 (CF==0) the operation is "symbol -= 0 - 1" which is +// the same as "symbol += 1". +// +// The instructions for all things are intertwined for a few reasons: +// - freeing temporary registers for new use +// - not modifying CF too early +// - instruction scheduling +// +// The first and last iterations can cheat a little. For example, +// on the first iteration "symbol" is known to start from 1 so it +// doesn't need to be read; it can even be immediately initialized +// to 2 to prepare for the second iteration of the loop. +// +// * * * +// +// a = number of the current prob variable (0 or 1) +// b = number of the next prob variable (1 or 0) +// *_only = rc_asm_y or _n to include or exclude code marked with them +#define rc_asm_bittree(a, b, first_only, middle_only, last_only) \ + first_only( \ + "movzw 2(%[probs_base]), %[prob" #a "]\n\t" \ + "mov $2, %[symbol]\n\t" \ + "movzw 4(%[probs_base]), %[prob" #b "]\n\t" \ + ) \ + middle_only( \ + /* Note the scaling of 4 instead of 2: */ \ + "movzw (%[probs_base], %q[symbol], 4), %[prob" #b "]\n\t" \ + ) \ + last_only( \ + "add %[symbol], %[symbol]\n\t" \ + ) \ + \ + rc_asm_normalize \ + rc_asm_calc("prob" #a) \ + \ + "cmovae %[t0], %[range]\n\t" \ + \ + first_only( \ + "movzw 6(%[probs_base]), %[t0]\n\t" \ + "cmovae %[t0], %[prob" #b "]\n\t" \ + ) \ + middle_only( \ + "movzw 2(%[probs_base], %q[symbol], 4), %[t0]\n\t" \ + "lea (%q[symbol], %q[symbol]), %[symbol]\n\t" \ + "cmovae %[t0], %[prob" #b "]\n\t" \ + ) \ + \ + "lea %c[bit_model_offset](%q[prob" #a "]), %[t0]\n\t" \ + "cmovb %[t1], %[code]\n\t" \ + "mov %[symbol], %[t1]\n\t" \ + "cmovae %[prob" #a "], %[t0]\n\t" \ + \ + first_only( \ + "sbb $-1, %[symbol]\n\t" \ + ) \ + middle_only( \ + "sbb $-1, %[symbol]\n\t" \ + ) \ + last_only( \ + "sbb %[last_sbb], %[symbol]\n\t" \ + ) \ + \ + "shr %[move_bits], %[t0]\n\t" \ + "sub %[t0], %[prob" #a "]\n\t" \ + /* Scaling of 1 instead of 2 because symbol <<= 1. */ \ + "mov %w[prob" #a "], (%[probs_base], %q[t1], 1)\n\t" + +// NOTE: The order of variables in __asm__ can affect speed and code size. +#define rc_asm_bittree_n(probs_base_var, final_add, asm_str) \ +do { \ + uint32_t t0; \ + uint32_t t1; \ + uint32_t t_prob0; \ + uint32_t t_prob1; \ + \ + __asm__( \ + asm_str \ + : \ + [range] "+&r"(rc.range), \ + [code] "+&r"(rc.code), \ + [t0] "=&r"(t0), \ + [t1] "=&r"(t1), \ + [prob0] "=&r"(t_prob0), \ + [prob1] "=&r"(t_prob1), \ + [symbol] "=&r"(symbol), \ + [in_ptr] "+&r"(rc_in_ptr) \ + : \ + [probs_base] "r"(probs_base_var), \ + [last_sbb] "n"(-1 - (final_add)), \ + [top_value] "n"(RC_TOP_VALUE), \ + [shift_bits] "n"(RC_SHIFT_BITS), \ + [bit_model_total_bits] "n"(RC_BIT_MODEL_TOTAL_BITS), \ + [bit_model_offset] "n"(RC_BIT_MODEL_OFFSET), \ + [move_bits] "n"(RC_MOVE_BITS) \ + : \ + "cc", "memory"); \ +} while (0) + + +#if LZMA_RANGE_DECODER_CONFIG & 0x010 +#undef rc_bittree3 +#define rc_bittree3(probs_base_var, final_add) \ + rc_asm_bittree_n(probs_base_var, final_add, \ + rc_asm_bittree(0, 1, rc_asm_y, rc_asm_n, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_n, rc_asm_y) \ + ) + +#undef rc_bittree6 +#define rc_bittree6(probs_base_var, final_add) \ + rc_asm_bittree_n(probs_base_var, final_add, \ + rc_asm_bittree(0, 1, rc_asm_y, rc_asm_n, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_n, rc_asm_y) \ + ) + +#undef rc_bittree8 +#define rc_bittree8(probs_base_var, final_add) \ + rc_asm_bittree_n(probs_base_var, final_add, \ + rc_asm_bittree(0, 1, rc_asm_y, rc_asm_n, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_n, rc_asm_y) \ + ) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x010 + + +// Fixed-sized reverse bittree +// +// This uses the indexing that constructs the final value in symbol directly. +// add = 1, 2, 4, 8 +// dcur = -, 4, 8, 16 +// dnext0 = 4, 8, 16, - +// dnext0 = 6, 12, 24, - +#define rc_asm_bittree_rev(a, b, add, dcur, dnext0, dnext1, \ + first_only, middle_only, last_only) \ + first_only( \ + "movzw 2(%[probs_base]), %[prob" #a "]\n\t" \ + "xor %[symbol], %[symbol]\n\t" \ + "movzw 4(%[probs_base]), %[prob" #b "]\n\t" \ + ) \ + middle_only( \ + "movzw " #dnext0 "(%[probs_base], %q[symbol], 2), " \ + "%[prob" #b "]\n\t" \ + ) \ + \ + rc_asm_normalize \ + rc_asm_calc("prob" #a) \ + \ + "cmovae %[t0], %[range]\n\t" \ + \ + first_only( \ + "movzw 6(%[probs_base]), %[t0]\n\t" \ + "cmovae %[t0], %[prob" #b "]\n\t" \ + ) \ + middle_only( \ + "movzw " #dnext1 "(%[probs_base], %q[symbol], 2), %[t0]\n\t" \ + "cmovae %[t0], %[prob" #b "]\n\t" \ + ) \ + \ + "lea " #add "(%q[symbol]), %[t0]\n\t" \ + "cmovb %[t1], %[code]\n\t" \ + middle_only( \ + "mov %[symbol], %[t1]\n\t" \ + ) \ + last_only( \ + "mov %[symbol], %[t1]\n\t" \ + ) \ + "cmovae %[t0], %[symbol]\n\t" \ + "lea %c[bit_model_offset](%q[prob" #a "]), %[t0]\n\t" \ + "cmovae %[prob" #a "], %[t0]\n\t" \ + \ + "shr %[move_bits], %[t0]\n\t" \ + "sub %[t0], %[prob" #a "]\n\t" \ + first_only( \ + "mov %w[prob" #a "], 2(%[probs_base])\n\t" \ + ) \ + middle_only( \ + "mov %w[prob" #a "], " \ + #dcur "(%[probs_base], %q[t1], 2)\n\t" \ + ) \ + last_only( \ + "mov %w[prob" #a "], " \ + #dcur "(%[probs_base], %q[t1], 2)\n\t" \ + ) + +#if LZMA_RANGE_DECODER_CONFIG & 0x020 +#undef rc_bittree_rev4 +#define rc_bittree_rev4(probs_base_var) \ +rc_asm_bittree_n(probs_base_var, 4, \ + rc_asm_bittree_rev(0, 1, 1, -, 4, 6, rc_asm_y, rc_asm_n, rc_asm_n) \ + rc_asm_bittree_rev(1, 0, 2, 4, 8, 12, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree_rev(0, 1, 4, 8, 16, 24, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree_rev(1, 0, 8, 16, -, -, rc_asm_n, rc_asm_n, rc_asm_y) \ +) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x020 + + +#if LZMA_RANGE_DECODER_CONFIG & 0x040 +#undef rc_bit_add_if_1 +#define rc_bit_add_if_1(probs_base_var, dest_var, value_to_add_if_1) \ +do { \ + uint32_t t0; \ + uint32_t t1; \ + uint32_t t2 = (value_to_add_if_1); \ + uint32_t t_prob; \ + uint32_t t_index; \ + \ + __asm__( \ + "movzw (%[probs_base], %q[symbol], 2), %[prob]\n\t" \ + "mov %[symbol], %[index]\n\t" \ + \ + "add %[dest], %[t2]\n\t" \ + "add %[symbol], %[symbol]\n\t" \ + \ + rc_asm_normalize \ + rc_asm_calc("prob") \ + \ + "cmovae %[t0], %[range]\n\t" \ + "lea %c[bit_model_offset](%q[prob]), %[t0]\n\t" \ + "cmovb %[t1], %[code]\n\t" \ + "cmovae %[prob], %[t0]\n\t" \ + \ + "cmovae %[t2], %[dest]\n\t" \ + "sbb $-1, %[symbol]\n\t" \ + \ + "sar %[move_bits], %[t0]\n\t" \ + "sub %[t0], %[prob]\n\t" \ + "mov %w[prob], (%[probs_base], %q[index], 2)" \ + : \ + [range] "+&r"(rc.range), \ + [code] "+&r"(rc.code), \ + [t0] "=&r"(t0), \ + [t1] "=&r"(t1), \ + [prob] "=&r"(t_prob), \ + [index] "=&r"(t_index), \ + [symbol] "+&r"(symbol), \ + [t2] "+&r"(t2), \ + [dest] "+&r"(dest_var), \ + [in_ptr] "+&r"(rc_in_ptr) \ + : \ + [probs_base] "r"(probs_base_var), \ + [top_value] "n"(RC_TOP_VALUE), \ + [shift_bits] "n"(RC_SHIFT_BITS), \ + [bit_model_total_bits] "n"(RC_BIT_MODEL_TOTAL_BITS), \ + [bit_model_offset] "n"(RC_BIT_MODEL_OFFSET), \ + [move_bits] "n"(RC_MOVE_BITS) \ + : \ + "cc", "memory"); \ +} while (0) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x040 + + +// Literal decoding uses a normal 8-bit bittree but literal with match byte +// is more complex in picking the probability variable from the correct +// subtree. This doesn't use preloading/prefetching of the next prob because +// there are four choices instead of two. +// +// FIXME? The first iteration starts with symbol = 1 so it could be optimized +// by a tiny amount. +#define rc_asm_matched_literal(nonlast_only) \ + "add %[offset], %[symbol]\n\t" \ + "and %[offset], %[match_bit]\n\t" \ + "add %[match_bit], %[symbol]\n\t" \ + \ + "movzw (%[probs_base], %q[symbol], 2), %[prob]\n\t" \ + \ + "add %[symbol], %[symbol]\n\t" \ + \ + nonlast_only( \ + "xor %[match_bit], %[offset]\n\t" \ + "add %[match_byte], %[match_byte]\n\t" \ + ) \ + \ + rc_asm_normalize \ + rc_asm_calc("prob") \ + \ + "cmovae %[t0], %[range]\n\t" \ + "lea %c[bit_model_offset](%q[prob]), %[t0]\n\t" \ + "cmovb %[t1], %[code]\n\t" \ + "mov %[symbol], %[t1]\n\t" \ + "cmovae %[prob], %[t0]\n\t" \ + \ + nonlast_only( \ + "cmovae %[match_bit], %[offset]\n\t" \ + "mov %[match_byte], %[match_bit]\n\t" \ + ) \ + \ + "sbb $-1, %[symbol]\n\t" \ + \ + "shr %[move_bits], %[t0]\n\t" \ + /* Undo symbol += match_bit + offset: */ \ + "and $0x1FF, %[symbol]\n\t" \ + "sub %[t0], %[prob]\n\t" \ + \ + /* Scaling of 1 instead of 2 because symbol <<= 1. */ \ + "mov %w[prob], (%[probs_base], %q[t1], 1)\n\t" + + +#if LZMA_RANGE_DECODER_CONFIG & 0x080 +#undef rc_matched_literal +#define rc_matched_literal(probs_base_var, match_byte_value) \ +do { \ + uint32_t t0; \ + uint32_t t1; \ + uint32_t t_prob; \ + uint32_t t_match_byte = (uint32_t)(match_byte_value) << 1; \ + uint32_t t_match_bit = t_match_byte; \ + uint32_t t_offset = 0x100; \ + symbol = 1; \ + \ + __asm__( \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_n) \ + : \ + [range] "+&r"(rc.range), \ + [code] "+&r"(rc.code), \ + [t0] "=&r"(t0), \ + [t1] "=&r"(t1), \ + [prob] "=&r"(t_prob), \ + [match_bit] "+&r"(t_match_bit), \ + [symbol] "+&r"(symbol), \ + [match_byte] "+&r"(t_match_byte), \ + [offset] "+&r"(t_offset), \ + [in_ptr] "+&r"(rc_in_ptr) \ + : \ + [probs_base] "r"(probs_base_var), \ + [top_value] "n"(RC_TOP_VALUE), \ + [shift_bits] "n"(RC_SHIFT_BITS), \ + [bit_model_total_bits] "n"(RC_BIT_MODEL_TOTAL_BITS), \ + [bit_model_offset] "n"(RC_BIT_MODEL_OFFSET), \ + [move_bits] "n"(RC_MOVE_BITS) \ + : \ + "cc", "memory"); \ +} while (0) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x080 + + +// Doing the loop in asm instead of C seems to help a little. +#if LZMA_RANGE_DECODER_CONFIG & 0x100 +#undef rc_direct +#define rc_direct(dest_var, count_var) \ +do { \ + uint32_t t0; \ + uint32_t t1; \ + \ + __asm__( \ + "2:\n\t" \ + "add %[dest], %[dest]\n\t" \ + "lea 1(%q[dest]), %[t1]\n\t" \ + \ + rc_asm_normalize \ + \ + "shr $1, %[range]\n\t" \ + "mov %[code], %[t0]\n\t" \ + "sub %[range], %[code]\n\t" \ + "cmovns %[t1], %[dest]\n\t" \ + "cmovs %[t0], %[code]\n\t" \ + "dec %[count]\n\t" \ + "jnz 2b\n\t" \ + : \ + [range] "+&r"(rc.range), \ + [code] "+&r"(rc.code), \ + [t0] "=&r"(t0), \ + [t1] "=&r"(t1), \ + [dest] "+&r"(dest_var), \ + [count] "+&r"(count_var), \ + [in_ptr] "+&r"(rc_in_ptr) \ + : \ + [top_value] "n"(RC_TOP_VALUE), \ + [shift_bits] "n"(RC_SHIFT_BITS) \ + : \ + "cc", "memory"); \ +} while (0) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x100 + +#endif // x86_64 #endif diff --git a/contrib/libs/lzma/liblzma/rangecoder/range_encoder.h b/contrib/libs/lzma/liblzma/rangecoder/range_encoder.h index d794eabbcc..8f62a47ac0 100644 --- a/contrib/libs/lzma/liblzma/rangecoder/range_encoder.h +++ b/contrib/libs/lzma/liblzma/rangecoder/range_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file range_encoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_RANGE_ENCODER_H diff --git a/contrib/libs/lzma/liblzma/simple/arm.c b/contrib/libs/lzma/liblzma/simple/arm.c index 6e53970d2f..58acb2d11a 100644 --- a/contrib/libs/lzma/liblzma/simple/arm.c +++ b/contrib/libs/lzma/liblzma/simple/arm.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file arm.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" diff --git a/contrib/libs/lzma/liblzma/simple/arm64.c b/contrib/libs/lzma/liblzma/simple/arm64.c index 0fe0824eb9..0a73f6c8bf 100644 --- a/contrib/libs/lzma/liblzma/simple/arm64.c +++ b/contrib/libs/lzma/liblzma/simple/arm64.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file arm64.c @@ -16,9 +18,6 @@ // Jia Tan // Igor Pavlov // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" diff --git a/contrib/libs/lzma/liblzma/simple/armthumb.c b/contrib/libs/lzma/liblzma/simple/armthumb.c index 25d8dbd4f3..f1eeca9b80 100644 --- a/contrib/libs/lzma/liblzma/simple/armthumb.c +++ b/contrib/libs/lzma/liblzma/simple/armthumb.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file armthumb.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" diff --git a/contrib/libs/lzma/liblzma/simple/ia64.c b/contrib/libs/lzma/liblzma/simple/ia64.c index 692b0a295e..5025014099 100644 --- a/contrib/libs/lzma/liblzma/simple/ia64.c +++ b/contrib/libs/lzma/liblzma/simple/ia64.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file ia64.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" diff --git a/contrib/libs/lzma/liblzma/simple/powerpc.c b/contrib/libs/lzma/liblzma/simple/powerpc.c index 3a340fd171..ba6cfbef3a 100644 --- a/contrib/libs/lzma/liblzma/simple/powerpc.c +++ b/contrib/libs/lzma/liblzma/simple/powerpc.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file powerpc.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" diff --git a/contrib/libs/lzma/liblzma/simple/riscv.c b/contrib/libs/lzma/liblzma/simple/riscv.c new file mode 100644 index 0000000000..b18df8b637 --- /dev/null +++ b/contrib/libs/lzma/liblzma/simple/riscv.c @@ -0,0 +1,755 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file riscv.c +/// \brief Filter for 32-bit/64-bit little/big endian RISC-V binaries +/// +/// This converts program counter relative addresses in function calls +/// (JAL, AUIPC+JALR), address calculation of functions and global +/// variables (AUIPC+ADDI), loads (AUIPC+load), and stores (AUIPC+store). +/// +/// For AUIPC+inst2 pairs, the paired instruction checking is fairly relaxed. +/// The paired instruction opcode must only have its lowest two bits set, +/// meaning it will convert any paired instruction that is not a 16-bit +/// compressed instruction. This was shown to be enough to keep the number +/// of false matches low while improving code size and speed. +// +// Authors: Lasse Collin +// Jia Tan +// +// Special thanks: +// +// - Chien Wong <m@xv97.com> provided a few early versions of RISC-V +// filter variants along with test files and benchmark results. +// +// - Igor Pavlov helped a lot in the filter design, getting it both +// faster and smaller. The implementation here is still independently +// written, not based on LZMA SDK. +// +/////////////////////////////////////////////////////////////////////////////// + +/* + +RISC-V filtering +================ + + RV32I and RV64I, possibly combined with extensions C, Zfh, F, D, + and Q, are identical enough that the same filter works for both. + + The instruction encoding is always little endian, even on systems + with big endian data access. Thus the same filter works for both + endiannesses. + + The following instructions have program counter relative + (pc-relative) behavior: + +JAL +--- + + JAL is used for function calls (including tail calls) and + unconditional jumps within functions. Jumps within functions + aren't useful to filter because the absolute addresses often + appear only once or at most a few times. Tail calls and jumps + within functions look the same to a simple filter so neither + are filtered, that is, JAL x0 is ignored (the ABI name of the + register x0 is "zero"). + + Almost all calls store the return address to register x1 (ra) + or x5 (t0). To reduce false matches when the filter is applied + to non-code data, only the JAL instructions that use x1 or x5 + are converted. JAL has pc-relative range of +/-1 MiB so longer + calls and jumps need another method (AUIPC+JALR). + +C.J and C.JAL +------------- + + C.J and C.JAL have pc-relative range of +/-2 KiB. + + C.J is for tail calls and jumps within functions and isn't + filtered for the reasons mentioned for JAL x0. + + C.JAL is an RV32C-only instruction. Its encoding overlaps with + RV64C-only C.ADDIW which is a common instruction. So if filtering + C.JAL was useful (it wasn't tested) then a separate filter would + be needed for RV32 and RV64. Also, false positives would be a + significant problem when the filter is applied to non-code data + because C.JAL needs only five bits to match. Thus, this filter + doesn't modify C.JAL instructions. + +BEQ, BNE, BLT, BGE, BLTU, BGEU, C.BEQZ, and C.BNEZ +-------------------------------------------------- + + These are conditional branches with pc-relative range + of +/-4 KiB (+/-256 B for C.*). The absolute addresses often + appear only once and very short distances are the most common, + so filtering these instructions would make compression worse. + +AUIPC with rd != x0 +------------------- + + AUIPC is paired with a second instruction (inst2) to do + pc-relative jumps, calls, loads, stores, and for taking + an address of a symbol. AUIPC has a 20-bit immediate and + the possible inst2 choices have a 12-bit immediate. + + AUIPC stores pc + 20-bit signed immediate to a register. + The immediate encodes a multiple of 4 KiB so AUIPC itself + has a pc-relative range of +/-2 GiB. AUIPC does *NOT* set + the lowest 12 bits of the result to zero! This means that + the 12-bit immediate in inst2 cannot just include the lowest + 12 bits of the absolute address as is; the immediate has to + compensate for the lowest 12 bits that AUIPC copies from the + program counter. This means that a good filter has to convert + not only AUIPC but also the paired inst2. + + A strict filter would focus on filtering the following + AUIPC+inst2 pairs: + + - AUIPC+JALR: Function calls, including tail calls. + + - AUIPC+ADDI: Calculating the address of a function + or a global variable. + + - AUIPC+load/store from the base instruction sets + (RV32I, RV64I) or from the floating point extensions + Zfh, F, D, and Q: + * RV32I: LB, LH, LW, LBU, LHU, SB, SH, SW + * RV64I has also: LD, LWU, SD + * Zfh: FLH, FSH + * F: FLW, FSW + * D: FLD, FSD + * Q: FLQ, FSQ + + NOTE: AUIPC+inst2 can only be a pair if AUIPC's rd specifies + the same register as inst2's rs1. + + Instead of strictly accepting only the above instructions as inst2, + this filter uses a much simpler condition: the lowest two bits of + inst2 must be set, that is, inst2 must not be a 16-bit compressed + instruction. So this will accept all 32-bit and possible future + extended instructions as a pair to AUIPC if the bits in AUIPC's + rd [11:7] match the bits [19:15] in inst2 (the bits that I-type and + S-type instructions use for rs1). Testing showed that this relaxed + condition for inst2 did not consistently or significantly affect + compression ratio but it reduced code size and improved speed. + + Additionally, the paired instruction is always treated as an I-type + instruction. The S-type instructions used by stores (SB, SH, SW, + etc.) place the lowest 5 bits of the immediate in a different + location than I-type instructions. AUIPC+store pairs are less + common than other pairs, and testing showed that the extra + code required to handle S-type instructions was not worth the + compression ratio gained. + + AUIPC+inst2 don't necessarily appear sequentially next to each + other although very often they do. Especially AUIPC+JALR are + sequential as that may allow instruction fusion in processors + (and perhaps help branch prediction as a fused AUIPC+JALR is + a direct branch while JALR alone is an indirect branch). + + Clang 16 can generate code where AUIPC+inst2 is split: + + - AUIPC is outside a loop and inst2 (load/store) is inside + the loop. This way the AUIPC instruction needs to be + executed only once. + + - Load-modify-store may have AUIPC for the load and the same + AUIPC-result is used for the store too. This may get combined + with AUIPC being outside the loop. + + - AUIPC is before a conditional branch and inst2 is hundreds + of bytes away at the branch target. + + - Inner and outer pair: + + auipc a1,0x2f + auipc a2,0x3d + ld a2,-500(a2) + addi a1,a1,-233 + + - Many split pairs with an untaken conditional branch between: + + auipc s9,0x1613 # Pair 1 + auipc s4,0x1613 # Pair 2 + auipc s6,0x1613 # Pair 3 + auipc s10,0x1613 # Pair 4 + beqz a5,a3baae + ld a0,0(a6) + ld a6,246(s9) # Pair 1 + ld a1,250(s4) # Pair 2 + ld a3,254(s6) # Pair 3 + ld a4,258(s10) # Pair 4 + + It's not possible to find all split pairs in a filter like this. + At least in 2024, simple sequential pairs are 99 % of AUIPC uses + so filtering only such pairs gives good results and makes the + filter simpler. However, it's possible that future compilers will + produce different code where sequential pairs aren't as common. + + This filter doesn't convert AUIPC instructions alone because: + + (1) The conversion would be off-by-one (or off-by-4096) half the + time because the lowest 12 bits from inst2 (inst2_imm12) + aren't known. We only know that the absolute address is + pc + AUIPC_imm20 + [-2048, +2047] but there is no way to + know the exact 4096-byte multiple (or 4096 * n + 2048): + there are always two possibilities because AUIPC copies + the 12 lowest bits from pc instead of zeroing them. + + NOTE: The sign-extension of inst2_imm12 adds a tiny bit + of extra complexity to AUIPC math in general but it's not + the reason for this problem. The sign-extension only changes + the relative position of the pc-relative 4096-byte window. + + (2) Matching AUIPC instruction alone requires only seven bits. + When the filter is applied to non-code data, that leads + to many false positives which make compression worse. + As long as most AUIPC+inst2 pairs appear as two consecutive + instructions, converting only such pairs gives better results. + + In assembly, AUIPC+inst2 tend to look like this: + + # Call: + auipc ra, 0x12345 + jalr ra, -42(ra) + + # Tail call: + auipc t1, 0x12345 + jalr zero, -42(t1) + + # Getting the absolute address: + auipc a0, 0x12345 + addi a0, a0, -42 + + # rd of inst2 isn't necessarily the same as rs1 even + # in cases where there is no reason to preserve rs1. + auipc a0, 0x12345 + addi a1, a0, -42 + + As of 2024, 16-bit instructions from the C extension don't + appear as inst2. The RISC-V psABI doesn't list AUIPC+C.* as + a linker relaxation type explicitly but it's not disallowed + either. Usefulness is limited as most of the time the lowest + 12 bits won't fit in a C instruction. This filter doesn't + support AUIPC+C.* combinations because this makes the filter + simpler, there are no test files, and it hopefully will never + be needed anyway. + + (Compare AUIPC to ARM64 where ADRP does set the lowest 12 bits + to zero. The paired instruction has the lowest 12 bits of the + absolute address as is in a zero-extended immediate. Thus the + ARM64 filter doesn't need to care about the instructions that + are paired with ADRP. An off-by-4096 issue can still occur if + the code section isn't aligned with the filter's start offset. + It's not a problem with standalone ELF files but Windows PE + files need start_offset=3072 for best results. Also, a .tar + stores files with 512-byte alignment so most of the time it + won't be the best for ARM64.) + +AUIPC with rd == x0 +------------------- + + AUIPC instructions with rd=x0 are reserved for HINTs in the base + instruction set. Such AUIPC instructions are never filtered. + + As of January 2024, it seems likely that AUIPC with rd=x0 will + be used for landing pads (pseudoinstruction LPAD). LPAD is used + to mark valid targets for indirect jumps (for JALR), for example, + beginnings of functions. The 20-bit immediate in LPAD instruction + is a label, not a pc-relative address. Thus it would be + counterproductive to convert AUIPC instructions with rd=x0. + + Often the next instruction after LPAD won't have rs1=x0 and thus + the filtering would be skipped for that reason alone. However, + it's not good to rely on this. For example, consider a function + that begins like this: + + int foo(int i) + { + if (i <= 234) { + ... + } + + A compiler may generate something like this: + + lpad 0x54321 + li a5, 234 + bgt a0, a5, .L2 + + Converting the pseudoinstructions to raw instructions: + + auipc x0, 0x54321 + addi x15, x0, 234 + blt x15, x10, .L2 + + In this case the filter would undesirably convert the AUIPC+ADDI + pair if the filter didn't explicitly skip AUIPC instructions + that have rd=x0. + +*/ + + +#include "simple_private.h" + + +// This checks two conditions at once: +// - AUIPC rd == inst2 rs1. +// - inst2 opcode has the lowest two bits set. +// +// The 8 bit left shift aligns the rd of AUIPC with the rs1 of inst2. +// By XORing the registers, any non-zero value in those bits indicates the +// registers are not equal and thus not an AUIPC pair. Subtracting 3 from +// inst2 will zero out the first two opcode bits only when they are set. +// The mask tests if any of the register or opcode bits are set (and thus +// not an AUIPC pair). +// +// Alternative expression: (((((auipc) << 8) ^ (inst2)) & 0xF8003) != 3) +#define NOT_AUIPC_PAIR(auipc, inst2) \ + ((((auipc) << 8) ^ ((inst2) - 3)) & 0xF8003) + +// This macro checks multiple conditions: +// (1) AUIPC rd [11:7] == x2 (special rd value). +// (2) AUIPC bits 12 and 13 set (the lowest two opcode bits of packed inst2). +// (3) inst2_rs1 doesn't equal x0 or x2 because the opposite +// conversion is only done when +// auipc_rd != x0 && +// auipc_rd != x2 && +// auipc_rd == inst2_rs1. +// +// The left-hand side takes care of (1) and (2). +// (a) The lowest 7 bits are already known to be AUIPC so subtracting 0x17 +// makes those bits zeros. +// (b) If AUIPC rd equals x2, subtracting 0x100 makes bits [11:7] zeros. +// If rd doesn't equal x2, then there will be at least one non-zero bit +// and the next step (c) is irrelevant. +// (c) If the lowest two opcode bits of the packed inst2 are set in [13:12], +// then subtracting 0x3000 will make those bits zeros. Otherwise there +// will be at least one non-zero bit. +// +// The shift by 18 removes the high bits from the final '>=' comparison and +// ensures that any non-zero result will be larger than any possible result +// from the right-hand side of the comparison. The cast ensures that the +// left-hand side didn't get promoted to a larger type than uint32_t. +// +// On the right-hand side, inst2_rs1 & 0x1D will be non-zero as long as +// inst2_rs1 is not x0 or x2. +// +// The final '>=' comparison will make the expression true if: +// - The subtraction caused any bits to be set (special AUIPC rd value not +// used or inst2 opcode bits not set). (non-zero >= non-zero or 0) +// - The subtraction did not cause any bits to be set but inst2_rs1 was +// x0 or x2. (0 >= 0) +#define NOT_SPECIAL_AUIPC(auipc, inst2_rs1) \ + ((uint32_t)(((auipc) - 0x3117) << 18) >= ((inst2_rs1) & 0x1D)) + + +// The encode and decode functions are split for this filter because of the +// AUIPC+inst2 filtering. This filter design allows a decoder-only +// implementation to be smaller than alternative designs. + +#ifdef HAVE_ENCODER_RISCV +static size_t +riscv_encode(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, + bool is_encoder lzma_attribute((__unused__)), + uint8_t *buffer, size_t size) +{ + // Avoid using i + 8 <= size in the loop condition. + // + // NOTE: If there is a JAL in the last six bytes of the stream, it + // won't be converted. This is intentional to keep the code simpler. + if (size < 8) + return 0; + + size -= 8; + + size_t i; + + // The loop is advanced by 2 bytes every iteration since the + // instruction stream may include 16-bit instructions (C extension). + for (i = 0; i <= size; i += 2) { + uint32_t inst = buffer[i]; + + if (inst == 0xEF) { + // JAL + const uint32_t b1 = buffer[i + 1]; + + // Only filter rd=x1(ra) and rd=x5(t0). + if ((b1 & 0x0D) != 0) + continue; + + // The 20-bit immediate is in four pieces. + // The encoder stores it in big endian form + // since it improves compression slightly. + const uint32_t b2 = buffer[i + 2]; + const uint32_t b3 = buffer[i + 3]; + const uint32_t pc = now_pos + (uint32_t)i; + +// The following chart shows the highest three bytes of JAL, focusing on +// the 20-bit immediate field [31:12]. The first row of numbers is the +// bit position in a 32-bit little endian instruction. The second row of +// numbers shows the order of the immediate field in a J-type instruction. +// The last row is the bit number in each byte. +// +// To determine the amount to shift each bit, subtract the value in +// the last row from the value in the second last row. If the number +// is positive, shift left. If negative, shift right. +// +// For example, at the rightmost side of the chart, the bit 4 in b1 is +// the bit 12 of the address. Thus that bit needs to be shifted left +// by 12 - 4 = 8 bits to put it in the right place in the addr variable. +// +// NOTE: The immediate of a J-type instruction holds bits [20:1] of +// the address. The bit [0] is always 0 and not part of the immediate. +// +// | b3 | b2 | b1 | +// | 31 30 29 28 27 26 25 24 | 23 22 21 20 19 18 17 16 | 15 14 13 12 x x x x | +// | 20 10 9 8 7 6 5 4 | 3 2 1 11 19 18 17 16 | 15 14 13 12 x x x x | +// | 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | 7 6 5 4 x x x x | + + uint32_t addr = ((b1 & 0xF0) << 8) + | ((b2 & 0x0F) << 16) + | ((b2 & 0x10) << 7) + | ((b2 & 0xE0) >> 4) + | ((b3 & 0x7F) << 4) + | ((b3 & 0x80) << 13); + + addr += pc; + + buffer[i + 1] = (uint8_t)((b1 & 0x0F) + | ((addr >> 13) & 0xF0)); + + buffer[i + 2] = (uint8_t)(addr >> 9); + buffer[i + 3] = (uint8_t)(addr >> 1); + + // The "-2" is included because the for-loop will + // always increment by 2. In this case, we want to + // skip an extra 2 bytes since we used 4 bytes + // of input. + i += 4 - 2; + + } else if ((inst & 0x7F) == 0x17) { + // AUIPC + inst |= (uint32_t)buffer[i + 1] << 8; + inst |= (uint32_t)buffer[i + 2] << 16; + inst |= (uint32_t)buffer[i + 3] << 24; + + // Branch based on AUIPC's rd. The bitmask test does + // the same thing as this: + // + // const uint32_t auipc_rd = (inst >> 7) & 0x1F; + // if (auipc_rd != 0 && auipc_rd != 2) { + if (inst & 0xE80) { + // AUIPC's rd doesn't equal x0 or x2. + + // Check if AUIPC+inst2 are a pair. + uint32_t inst2 = read32le(buffer + i + 4); + + if (NOT_AUIPC_PAIR(inst, inst2)) { + // The NOT_AUIPC_PAIR macro allows + // a false AUIPC+AUIPC pair if the + // bits [19:15] (where rs1 would be) + // in the second AUIPC match the rd + // of the first AUIPC. + // + // We must skip enough forward so + // that the first two bytes of the + // second AUIPC cannot get converted. + // Such a conversion could make the + // current pair become a valid pair + // which would desync the decoder. + // + // Skipping six bytes is enough even + // though the above condition looks + // at the lowest four bits of the + // buffer[i + 6] too. This is safe + // because this filter never changes + // those bits if a conversion at + // that position is done. + i += 6 - 2; + continue; + } + + // Convert AUIPC+inst2 to a special format: + // + // - The lowest 7 bits [6:0] retain the + // AUIPC opcode. + // + // - The rd [11:7] is set to x2(sp). x2 is + // used as the stack pointer so AUIPC with + // rd=x2 should be very rare in real-world + // executables. + // + // - The remaining 20 bits [31:12] (that + // normally hold the pc-relative immediate) + // are used to store the lowest 20 bits of + // inst2. That is, the 12-bit immediate of + // inst2 is not included. + // + // - The location of the original inst2 is + // used to store the 32-bit absolute + // address in big endian format. Compared + // to the 20+12-bit split encoding, this + // results in a longer uninterrupted + // sequence of identical common bytes + // when the same address is referred + // with different instruction pairs + // (like AUIPC+LD vs. AUIPC+ADDI) or + // when the occurrences of the same + // pair use different registers. When + // referring to adjacent memory locations + // (like function calls that go via the + // ELF PLT), in big endian order only the + // last 1-2 bytes differ; in little endian + // the differing 1-2 bytes would be in the + // middle of the 8-byte sequence. + // + // When reversing the transformation, the + // original rd of AUIPC can be restored + // from inst2's rs1 as they are required to + // be the same. + + // Arithmetic right shift makes sign extension + // trivial but (1) it's implementation-defined + // behavior (C99/C11/C23 6.5.7-p5) and so is + // (2) casting unsigned to signed (6.3.1.3-p3). + // + // One can check for (1) with + // + // if ((-1 >> 1) == -1) ... + // + // but (2) has to be checked from the + // compiler docs. GCC promises that (1) + // and (2) behave in the common expected + // way and thus + // + // addr += (uint32_t)( + // (int32_t)inst2 >> 20); + // + // does the same as the code below. But since + // the 100 % portable way is only a few bytes + // bigger code and there is no real speed + // difference, let's just use that, especially + // since the decoder doesn't need this at all. + uint32_t addr = inst & 0xFFFFF000; + addr += (inst2 >> 20) + - ((inst2 >> 19) & 0x1000); + + addr += now_pos + (uint32_t)i; + + // Construct the first 32 bits: + // [6:0] AUIPC opcode + // [11:7] Special AUIPC rd = x2 + // [31:12] The lowest 20 bits of inst2 + inst = 0x17 | (2 << 7) | (inst2 << 12); + + write32le(buffer + i, inst); + + // The second 32 bits store the absolute + // address in big endian order. + write32be(buffer + i + 4, addr); + } else { + // AUIPC's rd equals x0 or x2. + // + // x0 indicates a landing pad (LPAD). + // It's always skipped. + // + // AUIPC with rd == x2 is used for the special + // format as explained above. When the input + // contains a byte sequence that matches the + // special format, "fake" decoding must be + // done to keep the filter bijective (that + // is, safe to apply on arbitrary data). + // + // See the "x0 or x2" section in riscv_decode() + // for how the "real" decoding is done. The + // "fake" decoding is a simplified version + // of "real" decoding with the following + // differences (these reduce code size of + // the decoder): + // (1) The lowest 12 bits aren't sign-extended. + // (2) No address conversion is done. + // (3) Big endian format isn't used (the fake + // address is in little endian order). + + // Check if inst matches the special format. + const uint32_t fake_rs1 = inst >> 27; + + if (NOT_SPECIAL_AUIPC(inst, fake_rs1)) { + i += 4 - 2; + continue; + } + + const uint32_t fake_addr = + read32le(buffer + i + 4); + + // Construct the second 32 bits: + // [19:0] Upper 20 bits from AUIPC + // [31:20] The lowest 12 bits of fake_addr + const uint32_t fake_inst2 = (inst >> 12) + | (fake_addr << 20); + + // Construct new first 32 bits from: + // [6:0] AUIPC opcode + // [11:7] Fake AUIPC rd = fake_rs1 + // [31:12] The highest 20 bits of fake_addr + inst = 0x17 | (fake_rs1 << 7) + | (fake_addr & 0xFFFFF000); + + write32le(buffer + i, inst); + write32le(buffer + i + 4, fake_inst2); + } + + i += 8 - 2; + } + } + + return i; +} + + +extern lzma_ret +lzma_simple_riscv_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_simple_coder_init(next, allocator, filters, + &riscv_encode, 0, 8, 2, true); +} +#endif + + +#ifdef HAVE_DECODER_RISCV +static size_t +riscv_decode(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, + bool is_encoder lzma_attribute((__unused__)), + uint8_t *buffer, size_t size) +{ + if (size < 8) + return 0; + + size -= 8; + + size_t i; + for (i = 0; i <= size; i += 2) { + uint32_t inst = buffer[i]; + + if (inst == 0xEF) { + // JAL + const uint32_t b1 = buffer[i + 1]; + + // Only filter rd=x1(ra) and rd=x5(t0). + if ((b1 & 0x0D) != 0) + continue; + + const uint32_t b2 = buffer[i + 2]; + const uint32_t b3 = buffer[i + 3]; + const uint32_t pc = now_pos + (uint32_t)i; + +// | b3 | b2 | b1 | +// | 31 30 29 28 27 26 25 24 | 23 22 21 20 19 18 17 16 | 15 14 13 12 x x x x | +// | 20 10 9 8 7 6 5 4 | 3 2 1 11 19 18 17 16 | 15 14 13 12 x x x x | +// | 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | 7 6 5 4 x x x x | + + uint32_t addr = ((b1 & 0xF0) << 13) + | (b2 << 9) | (b3 << 1); + + addr -= pc; + + buffer[i + 1] = (uint8_t)((b1 & 0x0F) + | ((addr >> 8) & 0xF0)); + + buffer[i + 2] = (uint8_t)(((addr >> 16) & 0x0F) + | ((addr >> 7) & 0x10) + | ((addr << 4) & 0xE0)); + + buffer[i + 3] = (uint8_t)(((addr >> 4) & 0x7F) + | ((addr >> 13) & 0x80)); + + i += 4 - 2; + + } else if ((inst & 0x7F) == 0x17) { + // AUIPC + uint32_t inst2; + + inst |= (uint32_t)buffer[i + 1] << 8; + inst |= (uint32_t)buffer[i + 2] << 16; + inst |= (uint32_t)buffer[i + 3] << 24; + + if (inst & 0xE80) { + // AUIPC's rd doesn't equal x0 or x2. + + // Check if it is a "fake" AUIPC+inst2 pair. + inst2 = read32le(buffer + i + 4); + + if (NOT_AUIPC_PAIR(inst, inst2)) { + i += 6 - 2; + continue; + } + + // Decode (or more like re-encode) the "fake" + // pair. The "fake" format doesn't do + // sign-extension, address conversion, or + // use big endian. (The use of little endian + // allows sharing the write32le() calls in + // the decoder to reduce code size when + // unaligned access isn't supported.) + uint32_t addr = inst & 0xFFFFF000; + addr += inst2 >> 20; + + inst = 0x17 | (2 << 7) | (inst2 << 12); + inst2 = addr; + } else { + // AUIPC's rd equals x0 or x2. + + // Check if inst matches the special format + // used by the encoder. + const uint32_t inst2_rs1 = inst >> 27; + + if (NOT_SPECIAL_AUIPC(inst, inst2_rs1)) { + i += 4 - 2; + continue; + } + + // Decode the "real" pair. + uint32_t addr = read32be(buffer + i + 4); + + addr -= now_pos + (uint32_t)i; + + // The second instruction: + // - Get the lowest 20 bits from inst. + // - Add the lowest 12 bits of the address + // as the immediate field. + inst2 = (inst >> 12) | (addr << 20); + + // AUIPC: + // - rd is the same as inst2_rs1. + // - The sign extension of the lowest 12 bits + // must be taken into account. + inst = 0x17 | (inst2_rs1 << 7) + | ((addr + 0x800) & 0xFFFFF000); + } + + // Both decoder branches write in little endian order. + write32le(buffer + i, inst); + write32le(buffer + i + 4, inst2); + + i += 8 - 2; + } + } + + return i; +} + + +extern lzma_ret +lzma_simple_riscv_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_simple_coder_init(next, allocator, filters, + &riscv_decode, 0, 8, 2, false); +} +#endif diff --git a/contrib/libs/lzma/liblzma/simple/simple_coder.c b/contrib/libs/lzma/liblzma/simple/simple_coder.c index ed2d7fb02c..5cbfa82270 100644 --- a/contrib/libs/lzma/liblzma/simple/simple_coder.c +++ b/contrib/libs/lzma/liblzma/simple/simple_coder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file simple_coder.c @@ -8,9 +10,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" diff --git a/contrib/libs/lzma/liblzma/simple/simple_coder.h b/contrib/libs/lzma/liblzma/simple/simple_coder.h index 668a5092ad..2b762d5007 100644 --- a/contrib/libs/lzma/liblzma/simple/simple_coder.h +++ b/contrib/libs/lzma/liblzma/simple/simple_coder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file simple_coder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_SIMPLE_CODER_H @@ -62,12 +61,12 @@ extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next, extern lzma_ret lzma_simple_arm64_encoder_init(lzma_next_coder *next, - const lzma_allocator *allocator, - const lzma_filter_info *filters); + const lzma_allocator *allocator, + const lzma_filter_info *filters); extern lzma_ret lzma_simple_arm64_decoder_init(lzma_next_coder *next, - const lzma_allocator *allocator, - const lzma_filter_info *filters); + const lzma_allocator *allocator, + const lzma_filter_info *filters); extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next, @@ -78,4 +77,13 @@ extern lzma_ret lzma_simple_sparc_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_riscv_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_riscv_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + #endif diff --git a/contrib/libs/lzma/liblzma/simple/simple_decoder.c b/contrib/libs/lzma/liblzma/simple/simple_decoder.c index dc4d241511..d9820ee8ed 100644 --- a/contrib/libs/lzma/liblzma/simple/simple_decoder.c +++ b/contrib/libs/lzma/liblzma/simple/simple_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file simple_decoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_decoder.h" diff --git a/contrib/libs/lzma/liblzma/simple/simple_decoder.h b/contrib/libs/lzma/liblzma/simple/simple_decoder.h index bed8d37a96..2ae87bb863 100644 --- a/contrib/libs/lzma/liblzma/simple/simple_decoder.h +++ b/contrib/libs/lzma/liblzma/simple/simple_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file simple_decoder.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_SIMPLE_DECODER_H diff --git a/contrib/libs/lzma/liblzma/simple/simple_encoder.c b/contrib/libs/lzma/liblzma/simple/simple_encoder.c index d2cc03e58b..d1f35096e2 100644 --- a/contrib/libs/lzma/liblzma/simple/simple_encoder.c +++ b/contrib/libs/lzma/liblzma/simple/simple_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file simple_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_encoder.h" diff --git a/contrib/libs/lzma/liblzma/simple/simple_encoder.h b/contrib/libs/lzma/liblzma/simple/simple_encoder.h index 1cee4823a4..bf5edbb1c3 100644 --- a/contrib/libs/lzma/liblzma/simple/simple_encoder.h +++ b/contrib/libs/lzma/liblzma/simple/simple_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file simple_encoder.c @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_SIMPLE_ENCODER_H diff --git a/contrib/libs/lzma/liblzma/simple/simple_private.h b/contrib/libs/lzma/liblzma/simple/simple_private.h index 9d2c0fdd76..7aa360ff49 100644 --- a/contrib/libs/lzma/liblzma/simple/simple_private.h +++ b/contrib/libs/lzma/liblzma/simple/simple_private.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file simple_private.h @@ -5,9 +7,6 @@ // // Author: Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_SIMPLE_PRIVATE_H diff --git a/contrib/libs/lzma/liblzma/simple/sparc.c b/contrib/libs/lzma/liblzma/simple/sparc.c index bad8492ebc..e8ad285a19 100644 --- a/contrib/libs/lzma/liblzma/simple/sparc.c +++ b/contrib/libs/lzma/liblzma/simple/sparc.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file sparc.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" diff --git a/contrib/libs/lzma/liblzma/simple/x86.c b/contrib/libs/lzma/liblzma/simple/x86.c index 232b29542e..f216231f2d 100644 --- a/contrib/libs/lzma/liblzma/simple/x86.c +++ b/contrib/libs/lzma/liblzma/simple/x86.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file x86.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" @@ -27,11 +26,7 @@ static size_t x86_code(void *simple_ptr, uint32_t now_pos, bool is_encoder, uint8_t *buffer, size_t size) { - static const bool MASK_TO_ALLOWED_STATUS[8] - = { true, true, true, false, true, false, false, false }; - - static const uint32_t MASK_TO_BIT_NUMBER[8] - = { 0, 1, 2, 2, 3, 3, 3, 3 }; + static const uint32_t MASK_TO_BIT_NUMBER[5] = { 0, 1, 2, 2, 3 }; lzma_simple_x86 *simple = simple_ptr; uint32_t prev_mask = simple->prev_mask; @@ -68,9 +63,8 @@ x86_code(void *simple_ptr, uint32_t now_pos, bool is_encoder, b = buffer[buffer_pos + 4]; - if (Test86MSByte(b) - && MASK_TO_ALLOWED_STATUS[(prev_mask >> 1) & 0x7] - && (prev_mask >> 1) < 0x10) { + if (Test86MSByte(b) && (prev_mask >> 1) <= 4 + && (prev_mask >> 1) != 3) { uint32_t src = ((uint32_t)(b) << 24) | ((uint32_t)(buffer[buffer_pos + 3]) << 16) diff --git a/contrib/libs/lzma/ya.make b/contrib/libs/lzma/ya.make index 0eeab01f5b..8e651471d5 100644 --- a/contrib/libs/lzma/ya.make +++ b/contrib/libs/lzma/ya.make @@ -3,15 +3,17 @@ LIBRARY() LICENSE( - MIT AND + 0BSD AND + BSD-2-Clause AND + BSD-3-Clause AND Public-Domain ) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(5.4.6) +VERSION(5.6.2) -ORIGINAL_SOURCE(https://github.com/tukaani-project/xz/archive/v5.4.6.tar.gz) +ORIGINAL_SOURCE(https://github.com/tukaani-project/xz/archive/v5.6.2.tar.gz) ADDINCL( GLOBAL contrib/libs/lzma/liblzma/api @@ -112,6 +114,7 @@ SRCS( liblzma/simple/armthumb.c liblzma/simple/ia64.c liblzma/simple/powerpc.c + liblzma/simple/riscv.c liblzma/simple/simple_coder.c liblzma/simple/simple_decoder.c liblzma/simple/simple_encoder.c |