aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/lzma
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.com>2023-01-05 21:33:07 +0300
committershadchin <shadchin@yandex-team.com>2023-01-05 21:33:07 +0300
commit97c1b4fc326f4a9435bc743e56681bb148b17c37 (patch)
treec7d8dd514d722f747f5c6584343aeeddd5d2f8be /contrib/libs/lzma
parentd7b4c95518652104d51c915f0fd20fd3e954135c (diff)
downloadydb-97c1b4fc326f4a9435bc743e56681bb148b17c37.tar.gz
Update contrib/libs/lzma to 5.4.0
Diffstat (limited to 'contrib/libs/lzma')
-rw-r--r--contrib/libs/lzma/AUTHORS20
-rw-r--r--contrib/libs/lzma/CMakeLists.darwin.txt7
-rw-r--r--contrib/libs/lzma/CMakeLists.linux-aarch64.txt7
-rw-r--r--contrib/libs/lzma/CMakeLists.linux.txt7
-rw-r--r--contrib/libs/lzma/INSTALL76
-rw-r--r--contrib/libs/lzma/NEWS877
-rw-r--r--contrib/libs/lzma/README85
-rw-r--r--contrib/libs/lzma/common/mythread.h7
-rw-r--r--contrib/libs/lzma/common/tuklib_common.h2
-rw-r--r--contrib/libs/lzma/common/tuklib_config.h3
-rw-r--r--contrib/libs/lzma/common/tuklib_cpucores.c9
-rw-r--r--contrib/libs/lzma/common/tuklib_integer.h46
-rw-r--r--contrib/libs/lzma/common/tuklib_physmem.c2
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma.h3
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/base.h57
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/bcj.h6
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/block.h3
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/container.h304
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/filter.h316
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/hardware.h2
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/index.h66
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/index_hash.h2
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/lzma12.h123
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/version.h4
-rw-r--r--contrib/libs/lzma/liblzma/api/lzma/vli.h2
-rw-r--r--contrib/libs/lzma/liblzma/check/crc64_fast.c454
-rw-r--r--contrib/libs/lzma/liblzma/check/crc64_table.c21
-rw-r--r--contrib/libs/lzma/liblzma/common/alone_decoder.c23
-rw-r--r--contrib/libs/lzma/liblzma/common/alone_encoder.c1
-rw-r--r--contrib/libs/lzma/liblzma/common/auto_decoder.c27
-rw-r--r--contrib/libs/lzma/liblzma/common/block_buffer_encoder.c18
-rw-r--r--contrib/libs/lzma/liblzma/common/block_decoder.c79
-rw-r--r--contrib/libs/lzma/liblzma/common/block_encoder.c1
-rw-r--r--contrib/libs/lzma/liblzma/common/block_header_decoder.c29
-rw-r--r--contrib/libs/lzma/liblzma/common/common.c30
-rw-r--r--contrib/libs/lzma/liblzma/common/common.h87
-rw-r--r--contrib/libs/lzma/liblzma/common/file_info.c855
-rw-r--r--contrib/libs/lzma/liblzma/common/filter_common.c68
-rw-r--r--contrib/libs/lzma/liblzma/common/filter_common.h3
-rw-r--r--contrib/libs/lzma/liblzma/common/filter_decoder.c14
-rw-r--r--contrib/libs/lzma/liblzma/common/filter_encoder.c23
-rw-r--r--contrib/libs/lzma/liblzma/common/hardware_cputhreads.c12
-rw-r--r--contrib/libs/lzma/liblzma/common/index.c16
-rw-r--r--contrib/libs/lzma/liblzma/common/index_decoder.c15
-rw-r--r--contrib/libs/lzma/liblzma/common/index_decoder.h24
-rw-r--r--contrib/libs/lzma/liblzma/common/index_hash.c16
-rw-r--r--contrib/libs/lzma/liblzma/common/lzip_decoder.c414
-rw-r--r--contrib/libs/lzma/liblzma/common/lzip_decoder.h22
-rw-r--r--contrib/libs/lzma/liblzma/common/memcmplen.h12
-rw-r--r--contrib/libs/lzma/liblzma/common/microlzma_decoder.c221
-rw-r--r--contrib/libs/lzma/liblzma/common/microlzma_encoder.c140
-rw-r--r--contrib/libs/lzma/liblzma/common/outqueue.c299
-rw-r--r--contrib/libs/lzma/liblzma/common/outqueue.h176
-rw-r--r--contrib/libs/lzma/liblzma/common/stream_decoder.c32
-rw-r--r--contrib/libs/lzma/liblzma/common/stream_decoder_mt.c2016
-rw-r--r--contrib/libs/lzma/liblzma/common/stream_encoder.c41
-rw-r--r--contrib/libs/lzma/liblzma/common/stream_encoder_mt.c212
-rw-r--r--contrib/libs/lzma/liblzma/common/stream_flags_decoder.c10
-rw-r--r--contrib/libs/lzma/liblzma/common/string_conversion.c1317
-rw-r--r--contrib/libs/lzma/liblzma/lz/lz_decoder.c13
-rw-r--r--contrib/libs/lzma/liblzma/lz/lz_decoder.h12
-rw-r--r--contrib/libs/lzma/liblzma/lz/lz_encoder.c69
-rw-r--r--contrib/libs/lzma/liblzma/lz/lz_encoder.h16
-rw-r--r--contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c5
-rw-r--r--contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c8
-rw-r--r--contrib/libs/lzma/liblzma/lzma/lzma_decoder.c133
-rw-r--r--contrib/libs/lzma/liblzma/lzma/lzma_decoder.h2
-rw-r--r--contrib/libs/lzma/liblzma/lzma/lzma_encoder.c201
-rw-r--r--contrib/libs/lzma/liblzma/lzma/lzma_encoder.h3
-rw-r--r--contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h15
-rw-r--r--contrib/libs/lzma/liblzma/rangecoder/range_encoder.h123
-rw-r--r--contrib/libs/lzma/liblzma/simple/arm.c4
-rw-r--r--contrib/libs/lzma/liblzma/simple/arm64.c136
-rw-r--r--contrib/libs/lzma/liblzma/simple/armthumb.c4
-rw-r--r--contrib/libs/lzma/liblzma/simple/ia64.c4
-rw-r--r--contrib/libs/lzma/liblzma/simple/powerpc.c4
-rw-r--r--contrib/libs/lzma/liblzma/simple/simple_coder.h9
-rw-r--r--contrib/libs/lzma/liblzma/simple/sparc.c4
-rw-r--r--contrib/libs/lzma/liblzma/simple/x86.c4
79 files changed, 9001 insertions, 532 deletions
diff --git a/contrib/libs/lzma/AUTHORS b/contrib/libs/lzma/AUTHORS
index bda87975a5..69bbfc3ef6 100644
--- a/contrib/libs/lzma/AUTHORS
+++ b/contrib/libs/lzma/AUTHORS
@@ -3,15 +3,15 @@ Authors of XZ Utils
===================
XZ Utils is developed and maintained by Lasse Collin
- <lasse.collin@tukaani.org>.
+ <lasse.collin@tukaani.org> and Jia Tan <jiat0218@gmail.com>.
Major parts of liblzma are based on code written by Igor Pavlov,
- specifically the LZMA SDK <http://7-zip.org/sdk.html>. Without
+ specifically the LZMA SDK <https://7-zip.org/sdk.html>. Without
this code, XZ Utils wouldn't exist.
The SHA-256 implementation in liblzma is based on the code found from
- 7-Zip <http://7-zip.org/>, which has a modified version of the SHA-256
- code found from Crypto++ <http://www.cryptopp.com/>. The SHA-256 code
+ 7-Zip <https://7-zip.org/>, which has a modified version of the SHA-256
+ code found from Crypto++ <https://www.cryptopp.com/>. The SHA-256 code
in Crypto++ was written by Kevin Springle and Wei Dai.
Some scripts have been adapted from gzip. The original versions
@@ -19,6 +19,18 @@ Authors of XZ Utils
Andrew Dudman helped adapting the scripts and their man pages for
XZ Utils.
+ The initial version of the threaded .xz decompressor was written
+ by Sebastian Andrzej Siewior.
+
+ The initial version of the .lz (lzip) decoder was written
+ by Michał Górny.
+
+ CLMUL-accelerated CRC code was contributed by Ilya Kurdyukov.
+
+ Other authors:
+ - Jonathan Nieder
+ - Joachim Henke
+
The GNU Autotools-based build system contains files from many authors,
which I'm not trying to list here.
diff --git a/contrib/libs/lzma/CMakeLists.darwin.txt b/contrib/libs/lzma/CMakeLists.darwin.txt
index 156016b68a..bb12c2a6ad 100644
--- a/contrib/libs/lzma/CMakeLists.darwin.txt
+++ b/contrib/libs/lzma/CMakeLists.darwin.txt
@@ -55,6 +55,7 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_encoder_memusage.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_preset.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/file_info.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_buffer_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_buffer_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_common.c
@@ -68,15 +69,20 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_hash.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/lzip_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/microlzma_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/microlzma_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/outqueue.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_buffer_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_buffer_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_common.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_encoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/string_conversion.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_size.c
@@ -96,6 +102,7 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/lzma/lzma_encoder_presets.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/rangecoder/price_table.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/arm.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/arm64.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/armthumb.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/ia64.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/powerpc.c
diff --git a/contrib/libs/lzma/CMakeLists.linux-aarch64.txt b/contrib/libs/lzma/CMakeLists.linux-aarch64.txt
index c9f72e9ce0..8f0fef4c35 100644
--- a/contrib/libs/lzma/CMakeLists.linux-aarch64.txt
+++ b/contrib/libs/lzma/CMakeLists.linux-aarch64.txt
@@ -58,6 +58,7 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_encoder_memusage.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_preset.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/file_info.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_buffer_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_buffer_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_common.c
@@ -71,15 +72,20 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_hash.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/lzip_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/microlzma_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/microlzma_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/outqueue.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_buffer_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_buffer_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_common.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_encoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/string_conversion.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_size.c
@@ -99,6 +105,7 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/lzma/lzma_encoder_presets.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/rangecoder/price_table.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/arm.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/arm64.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/armthumb.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/ia64.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/powerpc.c
diff --git a/contrib/libs/lzma/CMakeLists.linux.txt b/contrib/libs/lzma/CMakeLists.linux.txt
index c9f72e9ce0..8f0fef4c35 100644
--- a/contrib/libs/lzma/CMakeLists.linux.txt
+++ b/contrib/libs/lzma/CMakeLists.linux.txt
@@ -58,6 +58,7 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_encoder_memusage.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/easy_preset.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/file_info.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_buffer_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_buffer_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/filter_common.c
@@ -71,15 +72,20 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/index_hash.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/lzip_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/microlzma_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/microlzma_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/outqueue.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_buffer_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_buffer_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_decoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_common.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/stream_flags_encoder.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/string_conversion.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_decoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_encoder.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/common/vli_size.c
@@ -99,6 +105,7 @@ target_sources(contrib-libs-lzma PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/lzma/lzma_encoder_presets.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/rangecoder/price_table.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/arm.c
+ ${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/arm64.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/armthumb.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/ia64.c
${CMAKE_SOURCE_DIR}/contrib/libs/lzma/liblzma/simple/powerpc.c
diff --git a/contrib/libs/lzma/INSTALL b/contrib/libs/lzma/INSTALL
index a55affb03d..67df18803f 100644
--- a/contrib/libs/lzma/INSTALL
+++ b/contrib/libs/lzma/INSTALL
@@ -142,12 +142,10 @@ XZ Utils Installation
If it is enough to build liblzma (no command line tools):
- - There is experimental CMake support. As it is, it should be
- good enough to build static liblzma with Visual Studio.
- Building liblzma.dll might work too (if it doesn't, it should
- be fixed). The CMake support may work with MinGW or MinGW-w64.
- Read the comment in the beginning of CMakeLists.txt before
- running CMake!
+ - There is CMake support. It should be good enough to build
+ static liblzma or liblzma.dll with Visual Studio. The CMake
+ support may work with MinGW or MinGW-w64. Read the comment
+ in the beginning of CMakeLists.txt before running CMake!
- There are Visual Studio project files under the "windows"
directory. See windows/INSTALL-MSVC.txt. In the future the
@@ -316,6 +314,18 @@ XZ Utils Installation
| xz -v -0 -Csha256 > foo.xz
time xz --test foo.xz
+ --disable-microlzma
+ Don't build MicroLZMA encoder and decoder. This omits
+ lzma_microlzma_encoder() and lzma_microlzma_decoder()
+ API functions from liblzma. These functions are needed
+ by specific applications only. They were written for
+ erofs-utils but they may be used by others too.
+
+ --disable-lzip-decoder
+ Disable decompression support for .lz (lzip) files.
+ This omits the API function lzma_lzip_decoder() from
+ liblzma and .lz support from the xz tool.
+
--disable-xz
--disable-xzdec
--disable-lzmadec
@@ -358,16 +368,45 @@ XZ Utils Installation
pre-i686 systems, you may want to disable the assembler
code.
+ --disable-clmul-crc
+ Disable the use carryless multiplication for CRC
+ calculation even if compiler support for it is detected.
+ The code uses runtime detection of SSSE3, SSE4.1, and
+ CLMUL instructions on x86. On 32-bit x86 this currently
+ is used only if --disable-assembler is used (this might
+ be fixed in the future). The code works on E2K too.
+
+ If using compiler options that unconditionally allow the
+ required extensions (-msse4.1 -mpclmul) then runtime
+ detection isn't used and the generic code is omitted.
+
--enable-unaligned-access
- Allow liblzma to use unaligned memory access for 16-bit
- and 32-bit loads and stores. This should be enabled only
- when the hardware supports this, i.e. when unaligned
- access is fast. Some operating system kernels emulate
- unaligned access, which is extremely slow. This option
- shouldn't be used on systems that rely on such emulation.
+ Allow liblzma to use unaligned memory access for 16-bit,
+ 32-bit, and 64-bit loads and stores. This should be
+ enabled only when the hardware supports this, that is,
+ when unaligned access is fast. Some operating system
+ kernels emulate unaligned access, which is extremely
+ slow. This option shouldn't be used on systems that
+ rely on such emulation.
Unaligned access is enabled by default on x86, x86-64,
- and big endian PowerPC.
+ big endian PowerPC, some ARM, and some ARM64 systems.
+
+ --enable-unsafe-type-punning
+ This enables use of code like
+
+ uint8_t *buf8 = ...;
+ *(uint32_t *)buf8 = ...;
+
+ which violates strict aliasing rules and may result
+ in broken code. There should be no need to use this
+ option with recent GCC or Clang versions on any
+ arch as just as fast code can be generated in a safe
+ way too (using __builtin_assume_aligned + memcpy).
+
+ However, this option might improve performance in some
+ other cases, especially with old compilers (for example,
+ GCC 3 and early 4.x on x86, GCC < 6 on ARMv6 and ARMv7).
--enable-small
Reduce the size of liblzma by selecting smaller but
@@ -427,7 +466,9 @@ XZ Utils Installation
no Disable threading support. This is the
same as using --disable-threads.
- NOTE: If combined with --enable-small, the
+ NOTE: If combined with --enable-small
+ and the compiler doesn't support
+ __attribute__((__constructor__)), the
resulting liblzma won't be thread safe,
that is, if a multi-threaded application
calls any liblzma functions from more than
@@ -447,6 +488,7 @@ XZ Utils Installation
auto Look for a supported sandboxing method
and use it if found. If no method is
found, then sandboxing isn't used.
+ This is the default.
no Disable sandboxing support.
@@ -455,6 +497,10 @@ XZ Utils Installation
sandboxing. If no Capsicum support
is found, configure will give an error.
+ pledge Use pledge(2) (OpenBSD >= 5.9) for
+ sandboxing. If pledge(2) isn't found,
+ configure will give an error.
+
--enable-symbol-versions
Use symbol versioning for liblzma. This is enabled by
default on GNU/Linux, other GNU-based systems, and
@@ -517,7 +563,7 @@ XZ Utils Installation
liblzma, pass --enable-small to configure.
- Tell the compiler to optimize for size instead of speed.
- E.g. with GCC, put -Os into CFLAGS.
+ For example, with GCC, put -Os into CFLAGS.
- xzdec and lzmadec will never use multithreading capabilities of
liblzma. You can avoid dependency on libpthread by passing
diff --git a/contrib/libs/lzma/NEWS b/contrib/libs/lzma/NEWS
index d3f7d60234..feae8a465e 100644
--- a/contrib/libs/lzma/NEWS
+++ b/contrib/libs/lzma/NEWS
@@ -2,6 +2,883 @@
XZ Utils Release Notes
======================
+5.4.0 (2022-12-13)
+
+ This bumps the minor version of liblzma because new features were
+ added. The API and ABI are still backward compatible with liblzma
+ 5.2.x and 5.0.x.
+
+ Since 5.3.5beta:
+
+ * All fixes from 5.2.10.
+
+ * The ARM64 filter is now stable. The xz option is now --arm64.
+ Decompression requires XZ Utils 5.4.0. In the future the ARM64
+ filter will be supported by XZ for Java, XZ Embedded (including
+ the version in Linux), LZMA SDK, and 7-Zip.
+
+ * Translations:
+
+ - Updated Catalan, Croatian, German, Romanian, and Turkish
+ translations.
+
+ - Updated German man page translations.
+
+ - Added Romanian man page translations.
+
+ Summary of new features added in the 5.3.x development releases:
+
+ * liblzma:
+
+ - Added threaded .xz decompressor lzma_stream_decoder_mt().
+ It can use multiple threads with .xz files that have multiple
+ Blocks with size information in Block Headers. The threaded
+ encoder in xz has always created such files.
+
+ Single-threaded encoder cannot store the size information in
+ Block Headers even if one used LZMA_FULL_FLUSH to create
+ multiple Blocks, so this threaded decoder cannot use multiple
+ threads with such files.
+
+ If there are multiple Streams (concatenated .xz files), one
+ Stream will be decompressed completely before starting the
+ next Stream.
+
+ - A new decoder flag LZMA_FAIL_FAST was added. It makes the
+ threaded decompressor report errors soon instead of first
+ flushing all pending data before the error location.
+
+ - New Filter IDs:
+ * LZMA_FILTER_ARM64 is for ARM64 binaries.
+ * LZMA_FILTER_LZMA1EXT is for raw LZMA1 streams that don't
+ necessarily use the end marker.
+
+ - Added lzma_str_to_filters(), lzma_str_from_filters(), and
+ lzma_str_list_filters() to convert a preset or a filter chain
+ string to a lzma_filter[] and vice versa. These should make
+ it easier to write applications that allow users to specify
+ custom compression options.
+
+ - Added lzma_filters_free() which can be convenient for freeing
+ the filter options in a filter chain (an array of lzma_filter
+ structures).
+
+ - lzma_file_info_decoder() to makes it a little easier to get
+ the Index field from .xz files. This helps in getting the
+ uncompressed file size but an easy-to-use random access
+ API is still missing which has existed in XZ for Java for
+ a long time.
+
+ - Added lzma_microlzma_encoder() and lzma_microlzma_decoder().
+ It is used by erofs-utils and may be used by others too.
+
+ The MicroLZMA format is a raw LZMA stream (without end marker)
+ whose first byte (always 0x00) has been replaced with
+ bitwise-negation of the LZMA properties (lc/lp/pb). It was
+ created for use in EROFS but may be used in other contexts
+ as well where it is important to avoid wasting bytes for
+ stream headers or footers. The format is also supported by
+ XZ Embedded (the XZ Embedded version in Linux got MicroLZMA
+ support in Linux 5.16).
+
+ The MicroLZMA encoder API in liblzma can compress into a
+ fixed-sized output buffer so that as much data is compressed
+ as can be fit into the buffer while still creating a valid
+ MicroLZMA stream. This is needed for EROFS.
+
+ - Added lzma_lzip_decoder() to decompress the .lz (lzip) file
+ format version 0 and the original unextended version 1 files.
+ Also lzma_auto_decoder() supports .lz files.
+
+ - lzma_filters_update() can now be used with the multi-threaded
+ encoder (lzma_stream_encoder_mt()) to change the filter chain
+ after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH.
+
+ - In lzma_options_lzma, allow nice_len = 2 and 3 with the match
+ finders that require at least 3 or 4. Now it is internally
+ rounded up if needed.
+
+ - CLMUL-based CRC64 on x86-64 and E2K with runtime processor
+ detection. On 32-bit x86 it currently isn't available unless
+ --disable-assembler is used which can make the non-CLMUL
+ CRC64 slower; this might be fixed in the future.
+
+ - Building with --disable-threads --enable-small
+ is now thread-safe if the compiler supports
+ __attribute__((__constructor__)).
+
+ * xz:
+
+ - Using -T0 (--threads=0) will now use multi-threaded encoder
+ even on a single-core system. This is to ensure that output
+ from the same xz binary is identical on both single-core and
+ multi-core systems.
+
+ - --threads=+1 or -T+1 is now a way to put xz into
+ multi-threaded mode while using only one worker thread.
+ The + is ignored if the number is not 1.
+
+ - A default soft memory usage limit is now used for compression
+ when -T0 is used and no explicit limit has been specified.
+ This soft limit is used to restrict the number of threads
+ but if the limit is exceeded with even one thread then xz
+ will continue with one thread using the multi-threaded
+ encoder and this limit is ignored. If the number of threads
+ is specified manually then no default limit will be used;
+ this affects only -T0.
+
+ This change helps on systems that have very many cores and
+ using all of them for xz makes no sense. Previously xz -T0
+ could run out of memory on such systems because it attempted
+ to reserve memory for too many threads.
+
+ This also helps with 32-bit builds which don't have a large
+ amount of address space that would be required for many
+ threads. The default soft limit for -T0 is at most 1400 MiB
+ on all 32-bit platforms.
+
+ - Previously a low value in --memlimit-compress wouldn't cause
+ xz to switch from multi-threaded mode to single-threaded mode
+ if the limit cannot otherwise be met; xz failed instead. Now
+ xz can switch to single-threaded mode and then, if needed,
+ scale down the LZMA2 dictionary size too just like it already
+ did when it was started in single-threaded mode.
+
+ - The option --no-adjust no longer prevents xz from scaling down
+ the number of threads as that doesn't affect the compressed
+ output (only performance). Now --no-adjust only prevents
+ adjustments that affect compressed output, that is, with
+ --no-adjust xz won't switch from multi-threaded mode to
+ single-threaded mode and won't scale down the LZMA2
+ dictionary size.
+
+ - Added a new option --memlimit-mt-decompress=LIMIT. This is
+ used to limit the number of decompressor threads (possibly
+ falling back to single-threaded mode) but it will never make
+ xz refuse to decompress a file. This has a system-specific
+ default value because without any limit xz could end up
+ allocating memory for the whole compressed input file, the
+ whole uncompressed output file, multiple thread-specific
+ decompressor instances and so on. Basically xz could
+ attempt to use an insane amount of memory even with fairly
+ common files. The system-specific default value is currently
+ the same as the one used for compression with -T0.
+
+ The new option works together with the existing option
+ --memlimit-decompress=LIMIT. The old option sets a hard limit
+ that must not be exceeded (xz will refuse to decompress)
+ while the new option only restricts the number of threads.
+ If the limit set with --memlimit-mt-decompress is greater
+ than the limit set with --memlimit-compress, then the latter
+ value is used also for --memlimit-mt-decompress.
+
+ - Added new information to the output of xz --info-memory and
+ new fields to the output of xz --robot --info-memory.
+
+ - In --lzma2=nice=NUMBER allow 2 and 3 with all match finders
+ now that liblzma handles it.
+
+ - Don't mention endianness for ARM and ARM-Thumb filters in
+ --long-help. The filters only work for little endian
+ instruction encoding but modern ARM processors using
+ big endian data access still use little endian
+ instruction encoding. So the help text was misleading.
+ In contrast, the PowerPC filter is only for big endian
+ 32/64-bit PowerPC code. Little endian PowerPC would need
+ a separate filter.
+
+ - Added decompression support for the .lz (lzip) file format
+ version 0 and the original unextended version 1. It is
+ autodetected by default. See also the option --format on
+ the xz man page.
+
+ - Sandboxing enabled by default:
+ * Capsicum (FreeBSD)
+ * pledge(2) (OpenBSD)
+
+ * Scripts now support the .lz format using xz.
+
+ * A few new tests were added.
+
+ * The liblzma-specific tests are now supported in CMake-based
+ builds too ("make test").
+
+
+5.3.5beta (2022-12-01)
+
+ * All fixes from 5.2.9.
+
+ * liblzma:
+
+ - Added new LZMA_FILTER_LZMA1EXT for raw encoder and decoder to
+ handle raw LZMA1 streams that don't have end of payload marker
+ (EOPM) alias end of stream (EOS) marker. It can be used in
+ filter chains, for example, with the x86 BCJ filter.
+
+ - Added lzma_str_to_filters(), lzma_str_from_filters(), and
+ lzma_str_list_filters() to make it easier for applications
+ to get custom compression options from a user and convert
+ it to an array of lzma_filter structures.
+
+ - Added lzma_filters_free().
+
+ - lzma_filters_update() can now be used with the multi-threaded
+ encoder (lzma_stream_encoder_mt()) to change the filter chain
+ after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH.
+
+ - In lzma_options_lzma, allow nice_len = 2 and 3 with the match
+ finders that require at least 3 or 4. Now it is internally
+ rounded up if needed.
+
+ - ARM64 filter was modified. It is still experimental.
+
+ - Fixed LTO build with Clang if -fgnuc-version=10 or similar
+ was used to make Clang look like GCC >= 10. Now it uses
+ __has_attribute(__symver__) which should be reliable.
+
+ * xz:
+
+ - --threads=+1 or -T+1 is now a way to put xz into multi-threaded
+ mode while using only one worker thread.
+
+ - In --lzma2=nice=NUMBER allow 2 and 3 with all match finders
+ now that liblzma handles it.
+
+ * Updated translations: Chinese (simplified), Korean, and Turkish.
+
+
+5.3.4alpha (2022-11-15)
+
+ * All fixes from 5.2.7 and 5.2.8.
+
+ * liblzma:
+
+ - Minor improvements to the threaded decoder.
+
+ - Added CRC64 implementation that uses SSSE3, SSE4.1, and CLMUL
+ instructions on 32/64-bit x86 and E2K. On 32-bit x86 it's
+ not enabled unless --disable-assembler is used but then
+ the non-CLMUL code might be slower. Processor support is
+ detected at runtime so this is built by default on x86-64
+ and E2K. On these platforms, if compiler flags indicate
+ unconditional CLMUL support (-msse4.1 -mpclmul) then the
+ generic version is not built, making liblzma 8-9 KiB smaller
+ compared to having both versions included.
+
+ With extremely compressible files this can make decompression
+ up to twice as fast but with typical files 5 % improvement
+ is a more realistic expectation.
+
+ The CLMUL version is slower than the generic version with
+ tiny inputs (especially at 1-8 bytes per call, but up to
+ 16 bytes). In normal use in xz this doesn't matter at all.
+
+ - Added an experimental ARM64 filter. This is *not* the final
+ version! Files created with this experimental version won't
+ be supported in the future versions! The filter design is
+ a compromise where improving one use case makes some other
+ cases worse.
+
+ - Added decompression support for the .lz (lzip) file format
+ version 0 and the original unextended version 1. See the
+ API docs of lzma_lzip_decoder() for details. Also
+ lzma_auto_decoder() supports .lz files.
+
+ - Building with --disable-threads --enable-small
+ is now thread-safe if the compiler supports
+ __attribute__((__constructor__))
+
+ * xz:
+
+ - Added support for OpenBSD's pledge(2) as a sandboxing method.
+
+ - Don't mention endianness for ARM and ARM-Thumb filters in
+ --long-help. The filters only work for little endian
+ instruction encoding but modern ARM processors using
+ big endian data access still use little endian
+ instruction encoding. So the help text was misleading.
+ In contrast, the PowerPC filter is only for big endian
+ 32/64-bit PowerPC code. Little endian PowerPC would need
+ a separate filter.
+
+ - Added --experimental-arm64. This will be renamed once the
+ filter is finished. Files created with this experimental
+ filter will not be supported in the future!
+
+ - Added new fields to the output of xz --robot --info-memory.
+
+ - Added decompression support for the .lz (lzip) file format
+ version 0 and the original unextended version 1. It is
+ autodetected by default. See also the option --format on
+ the xz man page.
+
+ * Scripts now support the .lz format using xz.
+
+ * Build systems:
+
+ - New #defines in config.h: HAVE_ENCODER_ARM64,
+ HAVE_DECODER_ARM64, HAVE_LZIP_DECODER, HAVE_CPUID_H,
+ HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR, HAVE_USABLE_CLMUL
+
+ - New configure options: --disable-clmul-crc,
+ --disable-microlzma, --disable-lzip-decoder, and
+ 'pledge' is now an option in --enable-sandbox (but
+ it's autodetected by default anyway).
+
+ - INSTALL was updated to document the new configure options.
+
+ - PACKAGERS now lists also --disable-microlzma and
+ --disable-lzip-decoder as configure options that must
+ not be used in builds for non-embedded use.
+
+ * Tests:
+
+ - Fix some of the tests so that they skip instead of fail if
+ certain features have been disabled with configure options.
+ It's still not perfect.
+
+ - Other improvements to tests.
+
+ * Updated translations: Croatian, Finnish, Hungarian, Polish,
+ Romanian, Spanish, Swedish, and Ukrainian.
+
+
+5.3.3alpha (2022-08-22)
+
+ * All fixes from 5.2.6.
+
+ * liblzma:
+
+ - Fixed 32-bit build.
+
+ - Added threaded .xz decompressor lzma_stream_decoder_mt().
+ It can use multiple threads with .xz files that have multiple
+ Blocks with size information in Block Headers. The threaded
+ encoder in xz has always created such files.
+
+ Single-threaded encoder cannot store the size information in
+ Block Headers even if one used LZMA_FULL_FLUSH to create
+ multiple Blocks, so this threaded decoder cannot use multiple
+ threads with such files.
+
+ If there are multiple Streams (concatenated .xz files), one
+ Stream will be decompressed completely before starting the
+ next Stream.
+
+ - A new decoder flag LZMA_FAIL_FAST was added. It makes the
+ threaded decompressor report errors soon instead of first
+ flushing all pending data before the error location.
+
+ * xz:
+
+ - Using -T0 (--threads=0) will now use multi-threaded encoder
+ even on a single-core system. This is to ensure that output
+ from the same xz binary is identical on both single-core and
+ multi-core systems.
+
+ - A default soft memory usage limit is now used for compression
+ when -T0 is used and no explicit limit has been specified.
+ This soft limit is used to restrict the number of threads
+ but if the limit is exceeded with even one thread then xz
+ will continue with one thread using the multi-threaded
+ encoder and this limit is ignored. If the number of threads
+ is specified manually then no default limit will be used;
+ this affects only -T0.
+
+ This change helps on systems that have very many cores and
+ using all of them for xz makes no sense. Previously xz -T0
+ could run out of memory on such systems because it attempted
+ to reserve memory for too many threads.
+
+ This also helps with 32-bit builds which don't have a large
+ amount of address space that would be required for many
+ threads. The default limit is 1400 MiB on all 32-bit
+ platforms with -T0.
+
+ Now xz -T0 should just work. It might use too few threads
+ in some cases but at least it shouldn't easily run out of
+ memory. It's possible that this will be tweaked before 5.4.0.
+
+ - Changes to --memlimit-compress and --no-adjust:
+
+ In single-threaded mode, --memlimit-compress can make xz
+ scale down the LZMA2 dictionary size to meet the memory usage
+ limit. This obviously affects the compressed output. However,
+ if xz was in threaded mode, --memlimit-compress could make xz
+ reduce the number of threads but it wouldn't make xz switch
+ from multi-threaded mode to single-threaded mode or scale
+ down the LZMA2 dictionary size. This seemed illogical.
+
+ Now --memlimit-compress can make xz switch to single-threaded
+ mode if one thread in multi-threaded mode uses too much
+ memory. If memory usage is still too high, then the LZMA2
+ dictionary size can be scaled down too.
+
+ The option --no-adjust was also changed so that it no longer
+ prevents xz from scaling down the number of threads as that
+ doesn't affect compressed output (only performance). After
+ this commit --no-adjust only prevents adjustments that affect
+ compressed output, that is, with --no-adjust xz won't switch
+ from multithreaded mode to single-threaded mode and won't
+ scale down the LZMA2 dictionary size.
+
+ - Added a new option --memlimit-mt-decompress=LIMIT. This is
+ used to limit the number of decompressor threads (possibly
+ falling back to single-threaded mode) but it will never make
+ xz refuse to decompress a file. This has a system-specific
+ default value because without any limit xz could end up
+ allocating memory for the whole compressed input file, the
+ whole uncompressed output file, multiple thread-specific
+ decompressor instances and so on. Basically xz could
+ attempt to use an insane amount of memory even with fairly
+ common files.
+
+ The new option works together with the existing option
+ --memlimit-decompress=LIMIT. The old option sets a hard limit
+ that must not be exceeded (xz will refuse to decompress)
+ while the new option only restricts the number of threads.
+ If the limit set with --memlimit-mt-decompress is greater
+ than the limit set with --memlimit-compress, then the latter
+ value is used also for --memlimit-mt-decompress.
+
+ * Tests:
+
+ - Added a few more tests.
+
+ - Added tests/code_coverage.sh to create a code coverage report
+ of the tests.
+
+ * Build systems:
+
+ - Automake's parallel test harness is now used to make tests
+ finish faster.
+
+ - Added the CMake files to the distribution tarball. These were
+ supposed to be in 5.2.5 already.
+
+ - Added liblzma tests to the CMake build.
+
+ - Windows: Fix building of liblzma.dll with the included
+ Visual Studio project files.
+
+
+5.3.2alpha (2021-10-28)
+
+ This release was made on short notice so that recent erofs-utils can
+ be built with LZMA support without needing a snapshot from xz.git.
+ Thus many pending things were not included, not even updated
+ translations (which would need to be updated for the new --list
+ strings anyway).
+
+ * All fixes from 5.2.5.
+
+ * xz:
+
+ - When copying metadata from the source file to the destination
+ file, don't try to set the group (GID) if it is already set
+ correctly. This avoids a failure on OpenBSD (and possibly on
+ a few other OSes) where files may get created so that their
+ group doesn't belong to the user, and fchown(2) can fail even
+ if it needs to do nothing.
+
+ - The --keep option now accepts symlinks, hardlinks, and
+ setuid, setgid, and sticky files. Previously this required
+ using --force.
+
+ - Split the long strings used in --list and --info-memory modes
+ to make them much easier for translators.
+
+ - If built with sandbox support and enabling the sandbox fails,
+ xz will now immediately exit with exit status of 1. Previously
+ it would only display a warning if -vv was used.
+
+ - Cap --memlimit-compress to 2000 MiB on MIPS32 because on
+ MIPS32 userspace processes are limited to 2 GiB of address
+ space.
+
+ * liblzma:
+
+ - Added lzma_microlzma_encoder() and lzma_microlzma_decoder().
+ The API is in lzma/container.h.
+
+ The MicroLZMA format is a raw LZMA stream (without end marker)
+ whose first byte (always 0x00) has been replaced with
+ bitwise-negation of the LZMA properties (lc/lp/pb). It was
+ created for use in EROFS but may be used in other contexts
+ as well where it is important to avoid wasting bytes for
+ stream headers or footers. The format is also supported by
+ XZ Embedded.
+
+ The MicroLZMA encoder API in liblzma can compress into a
+ fixed-sized output buffer so that as much data is compressed
+ as can be fit into the buffer while still creating a valid
+ MicroLZMA stream. This is needed for EROFS.
+
+ - Added fuzzing support.
+
+ - Support Intel Control-flow Enforcement Technology (CET) in
+ 32-bit x86 assembly files.
+
+ - Visual Studio: Use non-standard _MSVC_LANG to detect C++
+ standard version in the lzma.h API header. It's used to
+ detect when "noexcept" can be used.
+
+ * Scripts:
+
+ - Fix exit status of xzdiff/xzcmp. Exit status could be 2 when
+ the correct value is 1.
+
+ - Fix exit status of xzgrep.
+
+ - Detect corrupt .bz2 files in xzgrep.
+
+ - Add zstd support to xzgrep and xzdiff/xzcmp.
+
+ - Fix less(1) version detection in xzless. It failed if the
+ version number from "less -V" contained a dot.
+
+ * Fix typos and technical issues in man pages.
+
+ * Build systems:
+
+ - Windows: Fix building of resource files when config.h isn't
+ used. CMake + Visual Studio can now build liblzma.dll.
+
+ - Various fixes to the CMake support. It might still need a few
+ more fixes even for liblzma-only builds.
+
+
+5.3.1alpha (2018-04-29)
+
+ * All fixes from 5.2.4.
+
+ * Add lzma_file_info_decoder() into liblzma and use it in xz to
+ implement the --list feature.
+
+ * Capsicum sandbox support is enabled by default where available
+ (FreeBSD >= 10).
+
+
+5.2.10 (2022-12-13)
+
+ * xz: Don't modify argv[] when parsing the --memlimit* and
+ --block-list command line options. This fixes confusing
+ arguments in process listing (like "ps auxf").
+
+ * GNU/Linux only: Use __has_attribute(__symver__) to detect if
+ that attribute is supported. This fixes build on Mandriva where
+ Clang is patched to define __GNUC__ to 11 by default (instead
+ of 4 as used by Clang upstream).
+
+
+5.2.9 (2022-11-30)
+
+ * liblzma:
+
+ - Fixed an infinite loop in LZMA encoder initialization
+ if dict_size >= 2 GiB. (The encoder only supports up
+ to 1536 MiB.)
+
+ - Fixed two cases of invalid free() that can happen if
+ a tiny allocation fails in encoder re-initialization
+ or in lzma_filters_update(). These bugs had some
+ similarities with the bug fixed in 5.2.7.
+
+ - Fixed lzma_block_encoder() not allowing the use of
+ LZMA_SYNC_FLUSH with lzma_code() even though it was
+ documented to be supported. The sync-flush code in
+ the Block encoder was already used internally via
+ lzma_stream_encoder(), so this was just a missing flag
+ in the lzma_block_encoder() API function.
+
+ - GNU/Linux only: Don't put symbol versions into static
+ liblzma as it breaks things in some cases (and even if
+ it didn't break anything, symbol versions in static
+ libraries are useless anyway). The downside of the fix
+ is that if the configure options --with-pic or --without-pic
+ are used then it's not possible to build both shared and
+ static liblzma at the same time on GNU/Linux anymore;
+ with those options --disable-static or --disable-shared
+ must be used too.
+
+ * New email address for bug reports is <xz@tukaani.org> which
+ forwards messages to Lasse Collin and Jia Tan.
+
+
+5.2.8 (2022-11-13)
+
+ * xz:
+
+ - If xz cannot remove an input file when it should, this
+ is now treated as a warning (exit status 2) instead of
+ an error (exit status 1). This matches GNU gzip and it
+ is more logical as at that point the output file has
+ already been successfully closed.
+
+ - Fix handling of .xz files with an unsupported check type.
+ Previously such printed a warning message but then xz
+ behaved as if an error had occurred (didn't decompress,
+ exit status 1). Now a warning is printed, decompression
+ is done anyway, and exit status is 2. This used to work
+ slightly before 5.0.0. In practice this bug matters only
+ if xz has been built with some check types disabled. As
+ instructed in PACKAGERS, such builds should be done in
+ special situations only.
+
+ - Fix "xz -dc --single-stream tests/files/good-0-empty.xz"
+ which failed with "Internal error (bug)". That is,
+ --single-stream was broken if the first .xz stream in
+ the input file didn't contain any uncompressed data.
+
+ - Fix displaying file sizes in the progress indicator when
+ working in passthru mode and there are multiple input files.
+ Just like "gzip -cdf", "xz -cdf" works like "cat" when the
+ input file isn't a supported compressed file format. In
+ this case the file size counters weren't reset between
+ files so with multiple input files the progress indicator
+ displayed an incorrect (too large) value.
+
+ * liblzma:
+
+ - API docs in lzma/container.h:
+ * Update the list of decoder flags in the decoder
+ function docs.
+ * Explain LZMA_CONCATENATED behavior with .lzma files
+ in lzma_auto_decoder() docs.
+
+ - OpenBSD: Use HW_NCPUONLINE to detect the number of
+ available hardware threads in lzma_physmem().
+
+ - Fix use of wrong macro to detect x86 SSE2 support.
+ __SSE2_MATH__ was used with GCC/Clang but the correct
+ one is __SSE2__. The first one means that SSE2 is used
+ for floating point math which is irrelevant here.
+ The affected SSE2 code isn't used on x86-64 so this affects
+ only 32-bit x86 builds that use -msse2 without -mfpmath=sse
+ (there is no runtime detection for SSE2). It improves LZMA
+ compression speed (not decompression).
+
+ - Fix the build with Intel C compiler 2021 (ICC, not ICX)
+ on Linux. It defines __GNUC__ to 10 but doesn't support
+ the __symver__ attribute introduced in GCC 10.
+
+ * Scripts: Ignore warnings from xz by using --quiet --no-warn.
+ This is needed if the input .xz files use an unsupported
+ check type.
+
+ * Translations:
+
+ - Updated Croatian and Turkish translations.
+
+ - One new translations wasn't included because it needed
+ technical fixes. It will be in upcoming 5.4.0. No new
+ translations will be added to the 5.2.x branch anymore.
+
+ - Renamed the French man page translation file from
+ fr_FR.po to fr.po and thus also its install directory
+ (like /usr/share/man/fr_FR -> .../fr).
+
+ - Man page translations for upcoming 5.4.0 are now handled
+ in the Translation Project.
+
+ * Update doc/faq.txt a little so it's less out-of-date.
+
+
+5.2.7 (2022-09-30)
+
+ * liblzma:
+
+ - Made lzma_filters_copy() to never modify the destination
+ array if an error occurs. lzma_stream_encoder() and
+ lzma_stream_encoder_mt() already assumed this. Before this
+ change, if a tiny memory allocation in lzma_filters_copy()
+ failed it would lead to a crash (invalid free() or invalid
+ memory reads) in the cleanup paths of these two encoder
+ initialization functions.
+
+ - Added missing integer overflow check to lzma_index_append().
+ This affects xz --list and other applications that decode
+ the Index field from .xz files using lzma_index_decoder().
+ Normal decompression of .xz files doesn't call this code
+ and thus most applications using liblzma aren't affected
+ by this bug.
+
+ - Single-threaded .xz decoder (lzma_stream_decoder()): If
+ lzma_code() returns LZMA_MEMLIMIT_ERROR it is now possible
+ to use lzma_memlimit_set() to increase the limit and continue
+ decoding. This was supposed to work from the beginning
+ but there was a bug. With other decoders (.lzma or
+ threaded .xz decoder) this already worked correctly.
+
+ - Fixed accumulation of integrity check type statistics in
+ lzma_index_cat(). This bug made lzma_index_checks() return
+ only the type of the integrity check of the last Stream
+ when multiple lzma_indexes were concatenated. Most
+ applications don't use these APIs but in xz it made
+ xz --list not list all check types from concatenated .xz
+ files. In xz --list --verbose only the per-file "Check:"
+ lines were affected and in xz --robot --list only the "file"
+ line was affected.
+
+ - Added ABI compatibility with executables that were linked
+ against liblzma in RHEL/CentOS 7 or other liblzma builds
+ that had copied the problematic patch from RHEL/CentOS 7
+ (xz-5.2.2-compat-libs.patch). For the details, see the
+ comment at the top of src/liblzma/validate_map.sh.
+
+ WARNING: This uses __symver__ attribute with GCC >= 10.
+ In other cases the traditional __asm__(".symver ...")
+ is used. Using link-time optimization (LTO, -flto) with
+ GCC versions older than 10 can silently result in
+ broken liblzma.so.5 (incorrect symbol versions)! If you
+ want to use -flto with GCC, you must use GCC >= 10.
+ LTO with Clang seems to work even with the traditional
+ __asm__(".symver ...") method.
+
+ * xzgrep: Fixed compatibility with old shells that break if
+ comments inside command substitutions have apostrophes (').
+ This problem was introduced in 5.2.6.
+
+ * Build systems:
+
+ - New #define in config.h: HAVE_SYMBOL_VERSIONS_LINUX
+
+ - Windows: Fixed liblzma.dll build with Visual Studio project
+ files. It broke in 5.2.6 due to a change that was made to
+ improve CMake support.
+
+ - Windows: Building liblzma with UNICODE defined should now
+ work.
+
+ - CMake files are now actually included in the release tarball.
+ They should have been in 5.2.5 already.
+
+ - Minor CMake fixes and improvements.
+
+ * Added a new translation: Turkish
+
+
+5.2.6 (2022-08-12)
+
+ * xz:
+
+ - The --keep option now accepts symlinks, hardlinks, and
+ setuid, setgid, and sticky files. Previously this required
+ using --force.
+
+ - When copying metadata from the source file to the destination
+ file, don't try to set the group (GID) if it is already set
+ correctly. This avoids a failure on OpenBSD (and possibly on
+ a few other OSes) where files may get created so that their
+ group doesn't belong to the user, and fchown(2) can fail even
+ if it needs to do nothing.
+
+ - Cap --memlimit-compress to 2000 MiB instead of 4020 MiB on
+ MIPS32 because on MIPS32 userspace processes are limited
+ to 2 GiB of address space.
+
+ * liblzma:
+
+ - Fixed a missing error-check in the threaded encoder. If a
+ small memory allocation fails, a .xz file with an invalid
+ Index field would be created. Decompressing such a file would
+ produce the correct output but result in an error at the end.
+ Thus this is a "mild" data corruption bug. Note that while
+ a failed memory allocation can trigger the bug, it cannot
+ cause invalid memory access.
+
+ - The decoder for .lzma files now supports files that have
+ uncompressed size stored in the header and still use the
+ end of payload marker (end of stream marker) at the end
+ of the LZMA stream. Such files are rare but, according to
+ the documentation in LZMA SDK, they are valid.
+ doc/lzma-file-format.txt was updated too.
+
+ - Improved 32-bit x86 assembly files:
+ * Support Intel Control-flow Enforcement Technology (CET)
+ * Use non-executable stack on FreeBSD.
+
+ - Visual Studio: Use non-standard _MSVC_LANG to detect C++
+ standard version in the lzma.h API header. It's used to
+ detect when "noexcept" can be used.
+
+ * xzgrep:
+
+ - Fixed arbitrary command injection via a malicious filename
+ (CVE-2022-1271, ZDI-CAN-16587). A standalone patch for
+ this was released to the public on 2022-04-07. A slight
+ robustness improvement has been made since then and, if
+ using GNU or *BSD grep, a new faster method is now used
+ that doesn't use the old sed-based construct at all. This
+ also fixes bad output with GNU grep >= 3.5 (2020-09-27)
+ when xzgrepping binary files.
+
+ This vulnerability was discovered by:
+ cleemy desu wayo working with Trend Micro Zero Day Initiative
+
+ - Fixed detection of corrupt .bz2 files.
+
+ - Improved error handling to fix exit status in some situations
+ and to fix handling of signals: in some situations a signal
+ didn't make xzgrep exit when it clearly should have. It's
+ possible that the signal handling still isn't quite perfect
+ but hopefully it's good enough.
+
+ - Documented exit statuses on the man page.
+
+ - xzegrep and xzfgrep now use "grep -E" and "grep -F" instead
+ of the deprecated egrep and fgrep commands.
+
+ - Fixed parsing of the options -E, -F, -G, -P, and -X. The
+ problem occurred when multiple options were specied in
+ a single argument, for example,
+
+ echo foo | xzgrep -Fe foo
+
+ treated foo as a filename because -Fe wasn't correctly
+ split into -F -e.
+
+ - Added zstd support.
+
+ * xzdiff/xzcmp:
+
+ - Fixed wrong exit status. Exit status could be 2 when the
+ correct value is 1.
+
+ - Documented on the man page that exit status of 2 is used
+ for decompression errors.
+
+ - Added zstd support.
+
+ * xzless:
+
+ - Fix less(1) version detection. It failed if the version number
+ from "less -V" contained a dot.
+
+ * Translations:
+
+ - Added new translations: Catalan, Croatian, Esperanto,
+ Korean, Portuguese, Romanian, Serbian, Spanish, Swedish,
+ and Ukrainian
+
+ - Updated the Brazilian Portuguese translation.
+
+ - Added French man page translation. This and the existing
+ German translation aren't complete anymore because the
+ English man pages got a few updates and the translators
+ weren't reached so that they could update their work.
+
+ * Build systems:
+
+ - Windows: Fix building of resource files when config.h isn't
+ used. CMake + Visual Studio can now build liblzma.dll.
+
+ - Various fixes to the CMake support. Building static or shared
+ liblzma should work fine in most cases. In contrast, building
+ the command line tools with CMake is still clearly incomplete
+ and experimental and should be used for testing only.
+
+
5.2.5 (2020-03-17)
* liblzma:
diff --git a/contrib/libs/lzma/README b/contrib/libs/lzma/README
index 3f0c38dca6..b9081ed455 100644
--- a/contrib/libs/lzma/README
+++ b/contrib/libs/lzma/README
@@ -202,9 +202,77 @@ XZ Utils
https://translationproject.org/html/translators.html
- Several strings will change in a future version of xz so if you
- wish to start a new translation, look at the code in the xz git
- repostiory instead of a 5.2.x release.
+ Below are notes and testing instructions specific to xz
+ translations.
+
+ Testing can be done by installing xz into a temporary directory:
+
+ ./configure --disable-shared --prefix=/tmp/xz-test
+ # <Edit the .po file in the po directory.>
+ make -C po update-po
+ make install
+ bash debug/translation.bash | less
+ bash debug/translation.bash | less -S # For --list outputs
+
+ Repeat the above as needed (no need to re-run configure though).
+
+ Note especially the following:
+
+ - The output of --help and --long-help must look nice on
+ an 80-column terminal. It's OK to add extra lines if needed.
+
+ - In contrast, don't add extra lines to error messages and such.
+ They are often preceded with e.g. a filename on the same line,
+ so you have no way to predict where to put a \n. Let the terminal
+ do the wrapping even if it looks ugly. Adding new lines will be
+ even uglier in the generic case even if it looks nice in a few
+ limited examples.
+
+ - Be careful with column alignment in tables and table-like output
+ (--list, --list --verbose --verbose, --info-memory, --help, and
+ --long-help):
+
+ * All descriptions of options in --help should start in the
+ same column (but it doesn't need to be the same column as
+ in the English messages; just be consistent if you change it).
+ Check that both --help and --long-help look OK, since they
+ share several strings.
+
+ * --list --verbose and --info-memory print lines that have
+ the format "Description: %s". If you need a longer
+ description, you can put extra space between the colon
+ and %s. Then you may need to add extra space to other
+ strings too so that the result as a whole looks good (all
+ values start at the same column).
+
+ * The columns of the actual tables in --list --verbose --verbose
+ should be aligned properly. Abbreviate if necessary. It might
+ be good to keep at least 2 or 3 spaces between column headings
+ and avoid spaces in the headings so that the columns stand out
+ better, but this is a matter of opinion. Do what you think
+ looks best.
+
+ - Be careful to put a period at the end of a sentence when the
+ original version has it, and don't put it when the original
+ doesn't have it. Similarly, be careful with \n characters
+ at the beginning and end of the strings.
+
+ - Read the TRANSLATORS comments that have been extracted from the
+ source code and included in xz.pot. Some comments suggest
+ testing with a specific command which needs an .xz file. You
+ may use e.g. any tests/files/good-*.xz. However, these test
+ commands are included in translations.bash output, so reading
+ translations.bash output carefully can be enough.
+
+ - If you find language problems in the original English strings,
+ feel free to suggest improvements. Ask if something is unclear.
+
+ - The translated messages should be understandable (sometimes this
+ may be a problem with the original English messages too). Don't
+ make a direct word-by-word translation from English especially if
+ the result doesn't sound good in your language.
+
+ Thanks for your help!
5. Other implementations of the .xz format
@@ -226,11 +294,10 @@ XZ Utils
----------------------
If you have questions, bug reports, patches etc. related to XZ Utils,
- contact Lasse Collin <lasse.collin@tukaani.org> (in Finnish or English).
- I'm sometimes slow at replying. If you haven't got a reply within two
- weeks, assume that your email has got lost and resend it or use IRC.
+ the project maintainers Lasse Collin and Jia Tan can be reached via
+ <xz@tukaani.org>.
- You can find me also from #tukaani on Freenode; my nick is Larhzu.
- The channel tends to be pretty quiet, so just ask your question and
- someone may wake up.
+ You might find Lasse also from #tukaani on Libera Chat (IRC).
+ The nick is Larhzu. The channel tends to be pretty quiet,
+ so just ask your question and someone might wake up.
diff --git a/contrib/libs/lzma/common/mythread.h b/contrib/libs/lzma/common/mythread.h
index be22654240..4138218360 100644
--- a/contrib/libs/lzma/common/mythread.h
+++ b/contrib/libs/lzma/common/mythread.h
@@ -370,10 +370,11 @@ typedef struct {
BOOL pending_; \
if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \
abort(); \
- if (pending_) \
+ if (pending_) { \
func(); \
- if (!InitOnceComplete(&once, 0, NULL)) \
- abort(); \
+ if (!InitOnceComplete(&once, 0, NULL)) \
+ abort(); \
+ } \
} while (0)
#endif
diff --git a/contrib/libs/lzma/common/tuklib_common.h b/contrib/libs/lzma/common/tuklib_common.h
index 31fbab58b0..b1f531ea4a 100644
--- a/contrib/libs/lzma/common/tuklib_common.h
+++ b/contrib/libs/lzma/common/tuklib_common.h
@@ -14,7 +14,7 @@
#define TUKLIB_COMMON_H
// The config file may be replaced by a package-specific file.
-// It should include at least stddef.h, inttypes.h, and limits.h.
+// It should include at least stddef.h, stdbool.h, inttypes.h, and limits.h.
#include "tuklib_config.h"
// TUKLIB_SYMBOL_PREFIX is prefixed to all symbols exported by
diff --git a/contrib/libs/lzma/common/tuklib_config.h b/contrib/libs/lzma/common/tuklib_config.h
index 549cb24d77..9d470ba732 100644
--- a/contrib/libs/lzma/common/tuklib_config.h
+++ b/contrib/libs/lzma/common/tuklib_config.h
@@ -1,7 +1,10 @@
+// If config.h isn't available, assume that the headers required by
+// tuklib_common.h are available. This is required by crc32_tablegen.c.
#ifdef HAVE_CONFIG_H
# include "sysdefs.h"
#else
# include <stddef.h>
+# include <stdbool.h>
# include <inttypes.h>
# include <limits.h>
#endif
diff --git a/contrib/libs/lzma/common/tuklib_cpucores.c b/contrib/libs/lzma/common/tuklib_cpucores.c
index cc968dd25e..bb3f2f752b 100644
--- a/contrib/libs/lzma/common/tuklib_cpucores.c
+++ b/contrib/libs/lzma/common/tuklib_cpucores.c
@@ -72,7 +72,16 @@ tuklib_cpucores(void)
}
#elif defined(TUKLIB_CPUCORES_SYSCTL)
+ // On OpenBSD HW_NCPUONLINE tells the number of processor cores that
+ // are online so it is preferred over HW_NCPU which also counts cores
+ // that aren't currently available. The number of cores online is
+ // often less than HW_NCPU because OpenBSD disables simultaneous
+ // multi-threading (SMT) by default.
+# ifdef HW_NCPUONLINE
+ int name[2] = { CTL_HW, HW_NCPUONLINE };
+# else
int name[2] = { CTL_HW, HW_NCPU };
+# endif
int cpus;
size_t cpus_size = sizeof(cpus);
if (sysctl(name, 2, &cpus, &cpus_size, NULL, 0) != -1
diff --git a/contrib/libs/lzma/common/tuklib_integer.h b/contrib/libs/lzma/common/tuklib_integer.h
index 6f44a7a0ac..b58ef68d09 100644
--- a/contrib/libs/lzma/common/tuklib_integer.h
+++ b/contrib/libs/lzma/common/tuklib_integer.h
@@ -17,8 +17,8 @@
/// - Byte swapping: bswapXX(num)
/// - Byte order conversions to/from native (byteswaps if Y isn't
/// the native endianness): convXXYe(num)
-/// - Unaligned reads (16/32-bit only): readXXYe(ptr)
-/// - Unaligned writes (16/32-bit only): writeXXYe(ptr, num)
+/// - Unaligned reads: readXXYe(ptr)
+/// - Unaligned writes: writeXXYe(ptr, num)
/// - Aligned reads: aligned_readXXYe(ptr)
/// - Aligned writes: aligned_writeXXYe(ptr, num)
///
@@ -343,6 +343,46 @@ read32le(const uint8_t *buf)
}
+static inline uint64_t
+read64be(const uint8_t *buf)
+{
+#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
+ uint64_t num = read64ne(buf);
+ return conv64be(num);
+#else
+ uint64_t num = (uint64_t)buf[0] << 56;
+ num |= (uint64_t)buf[1] << 48;
+ num |= (uint64_t)buf[2] << 40;
+ num |= (uint64_t)buf[3] << 32;
+ num |= (uint64_t)buf[4] << 24;
+ num |= (uint64_t)buf[5] << 16;
+ num |= (uint64_t)buf[6] << 8;
+ num |= (uint64_t)buf[7];
+ return num;
+#endif
+}
+
+
+static inline uint64_t
+read64le(const uint8_t *buf)
+{
+#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
+ uint64_t num = read64ne(buf);
+ return conv64le(num);
+#else
+ uint64_t num = (uint64_t)buf[0];
+ num |= (uint64_t)buf[1] << 8;
+ num |= (uint64_t)buf[2] << 16;
+ num |= (uint64_t)buf[3] << 24;
+ num |= (uint64_t)buf[4] << 32;
+ num |= (uint64_t)buf[5] << 40;
+ num |= (uint64_t)buf[6] << 48;
+ num |= (uint64_t)buf[7] << 56;
+ return num;
+#endif
+}
+
+
// NOTE: Possible byte swapping must be done in a macro to allow the compiler
// to optimize byte swapping of constants when using glibc's or *BSD's
// byte swapping macros. The actual write is done in an inline function
@@ -350,11 +390,13 @@ read32le(const uint8_t *buf)
#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
# define write16be(buf, num) write16ne(buf, conv16be(num))
# define write32be(buf, num) write32ne(buf, conv32be(num))
+# define write64be(buf, num) write64ne(buf, conv64be(num))
#endif
#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
# define write16le(buf, num) write16ne(buf, conv16le(num))
# define write32le(buf, num) write32ne(buf, conv32le(num))
+# define write64le(buf, num) write64ne(buf, conv64le(num))
#endif
diff --git a/contrib/libs/lzma/common/tuklib_physmem.c b/contrib/libs/lzma/common/tuklib_physmem.c
index c499791474..fb11478f24 100644
--- a/contrib/libs/lzma/common/tuklib_physmem.c
+++ b/contrib/libs/lzma/common/tuklib_physmem.c
@@ -84,7 +84,7 @@ tuklib_physmem(void)
// supports reporting values greater than 4 GiB. To keep the
// code working also on older Windows versions, use
// GlobalMemoryStatusEx() conditionally.
- HMODULE kernel32 = GetModuleHandle("kernel32.dll");
+ HMODULE kernel32 = GetModuleHandle(TEXT("kernel32.dll"));
if (kernel32 != NULL) {
typedef BOOL (WINAPI *gmse_type)(LPMEMORYSTATUSEX);
gmse_type gmse = (gmse_type)GetProcAddress(
diff --git a/contrib/libs/lzma/liblzma/api/lzma.h b/contrib/libs/lzma/liblzma/api/lzma.h
index 122dab80d3..8fbd9a874b 100644
--- a/contrib/libs/lzma/liblzma/api/lzma.h
+++ b/contrib/libs/lzma/liblzma/api/lzma.h
@@ -219,7 +219,8 @@
*/
#ifndef lzma_nothrow
# if defined(__cplusplus)
-# if __cplusplus >= 201103L
+# if __cplusplus >= 201103L || (defined(_MSVC_LANG) \
+ && _MSVC_LANG >= 201103L)
# define lzma_nothrow noexcept
# else
# define lzma_nothrow throw()
diff --git a/contrib/libs/lzma/liblzma/api/lzma/base.h b/contrib/libs/lzma/liblzma/api/lzma/base.h
index a6005accc9..ff2d86968e 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/base.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/base.h
@@ -145,6 +145,12 @@ typedef enum {
* specified memory usage limit. To continue decoding,
* the memory usage limit has to be increased with
* lzma_memlimit_set().
+ *
+ * liblzma 5.2.6 and earlier had a bug in single-threaded .xz
+ * decoder (lzma_stream_decoder()) which made it impossible
+ * to continue decoding after LZMA_MEMLIMIT_ERROR even if
+ * the limit was increased using lzma_memlimit_set().
+ * Other decoders worked correctly.
*/
LZMA_FORMAT_ERROR = 7,
@@ -234,6 +240,36 @@ typedef enum {
* can be a sign of a bug in liblzma. See the documentation
* how to report bugs.
*/
+
+ LZMA_SEEK_NEEDED = 12,
+ /**<
+ * \brief Request to change the input file position
+ *
+ * Some coders can do random access in the input file. The
+ * initialization functions of these coders take the file size
+ * as an argument. No other coders can return LZMA_SEEK_NEEDED.
+ *
+ * When this value is returned, the application must seek to
+ * the file position given in lzma_stream.seek_pos. This value
+ * is guaranteed to never exceed the file size that was
+ * specified at the coder initialization.
+ *
+ * After seeking the application should read new input and
+ * pass it normally via lzma_stream.next_in and .avail_in.
+ */
+
+ /*
+ * These eumerations may be used internally by liblzma
+ * but they will never be returned to applications.
+ */
+ LZMA_RET_INTERNAL1 = 101,
+ LZMA_RET_INTERNAL2 = 102,
+ LZMA_RET_INTERNAL3 = 103,
+ LZMA_RET_INTERNAL4 = 104,
+ LZMA_RET_INTERNAL5 = 105,
+ LZMA_RET_INTERNAL6 = 106,
+ LZMA_RET_INTERNAL7 = 107,
+ LZMA_RET_INTERNAL8 = 108
} lzma_ret;
@@ -447,7 +483,7 @@ typedef struct lzma_internal_s lzma_internal;
*
* The lzma_stream structure is used for
* - passing pointers to input and output buffers to liblzma;
- * - defining custom memory hander functions; and
+ * - defining custom memory handler functions; and
* - holding a pointer to coder-specific internal data structures.
*
* Typical usage:
@@ -514,7 +550,19 @@ typedef struct {
void *reserved_ptr2;
void *reserved_ptr3;
void *reserved_ptr4;
- uint64_t reserved_int1;
+
+ /**
+ * \brief New seek input position for LZMA_SEEK_NEEDED
+ *
+ * When lzma_code() returns LZMA_SEEK_NEEDED, the new input position
+ * needed by liblzma will be available seek_pos. The value is
+ * guaranteed to not exceed the file size that was specified when
+ * this lzma_stream was initialized.
+ *
+ * In all other situations the value of this variable is undefined.
+ */
+ uint64_t seek_pos;
+
uint64_t reserved_int2;
size_t reserved_int3;
size_t reserved_int4;
@@ -649,6 +697,11 @@ extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm)
* return LZMA_OK. Later versions treat 0 as if 1 had been specified (so
* lzma_memlimit_get() will return 1 even if you specify 0 here).
*
+ * liblzma 5.2.6 and earlier had a bug in single-threaded .xz decoder
+ * (lzma_stream_decoder()) which made it impossible to continue decoding
+ * after LZMA_MEMLIMIT_ERROR even if the limit was increased using
+ * lzma_memlimit_set(). Other decoders worked correctly.
+ *
* \return - LZMA_OK: New memory usage limit successfully set.
* - LZMA_MEMLIMIT_ERROR: The new limit is too small.
* The limit was not changed.
diff --git a/contrib/libs/lzma/liblzma/api/lzma/bcj.h b/contrib/libs/lzma/liblzma/api/lzma/bcj.h
index 8e37538ad4..a1738d93b7 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/bcj.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/bcj.h
@@ -49,9 +49,13 @@
* Filter for SPARC binaries.
*/
+#define LZMA_FILTER_ARM64 LZMA_VLI_C(0x0A)
+ /**<
+ * Filter for ARM64 binaries.
+ */
/**
- * \brief Options for BCJ filters
+ * \brief Options for BCJ filters (except ARM64)
*
* The BCJ filters never change the size of the data. Specifying options
* for them is optional: if pointer to options is NULL, default value is
diff --git a/contrib/libs/lzma/liblzma/api/lzma/block.h b/contrib/libs/lzma/liblzma/api/lzma/block.h
index 962f38779c..082e55833e 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/block.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/block.h
@@ -464,9 +464,6 @@ extern LZMA_API(lzma_ret) lzma_block_encoder(
* LZMA_FINISH is not required. It is supported only for convenience.
*
* \return - LZMA_OK: All good, continue with lzma_code().
- * - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but
- * the given Check ID is not supported, thus Check will be
- * ignored.
* - LZMA_PROG_ERROR
* - LZMA_MEM_ERROR
*/
diff --git a/contrib/libs/lzma/liblzma/api/lzma/container.h b/contrib/libs/lzma/liblzma/api/lzma/container.h
index 9fbf4df061..6e432a1f3b 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/container.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/container.h
@@ -69,7 +69,12 @@ typedef struct {
*
* Set this to zero if no flags are wanted.
*
- * No flags are currently supported.
+ * Encoder: No flags are currently supported.
+ *
+ * Decoder: Bitwise-or of zero or more of the decoder flags:
+ * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
+ * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK,
+ * LZMA_CONCATENATED, LZMA_FAIL_FAST
*/
uint32_t flags;
@@ -79,7 +84,7 @@ typedef struct {
uint32_t threads;
/**
- * \brief Maximum uncompressed size of a Block
+ * \brief Encoder only: Maximum uncompressed size of a Block
*
* The encoder will start a new .xz Block every block_size bytes.
* Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code()
@@ -135,7 +140,7 @@ typedef struct {
uint32_t timeout;
/**
- * \brief Compression preset (level and possible flags)
+ * \brief Encoder only: Compression preset
*
* The preset is set just like with lzma_easy_encoder().
* The preset is ignored if filters below is non-NULL.
@@ -143,7 +148,7 @@ typedef struct {
uint32_t preset;
/**
- * \brief Filter chain (alternative to a preset)
+ * \brief Encoder only: Filter chain (alternative to a preset)
*
* If this is NULL, the preset above is used. Otherwise the preset
* is ignored and the filter chain specified here is used.
@@ -151,7 +156,7 @@ typedef struct {
const lzma_filter *filters;
/**
- * \brief Integrity check type
+ * \brief Encoder only: Integrity check type
*
* See check.h for available checks. The xz command line tool
* defaults to LZMA_CHECK_CRC64, which is a good choice if you
@@ -173,8 +178,50 @@ typedef struct {
uint32_t reserved_int2;
uint32_t reserved_int3;
uint32_t reserved_int4;
- uint64_t reserved_int5;
- uint64_t reserved_int6;
+
+ /**
+ * \brief Memory usage limit to reduce the number of threads
+ *
+ * Encoder: Ignored.
+ *
+ * Decoder:
+ *
+ * If the number of threads has been set so high that more than
+ * memlimit_threading bytes of memory would be needed, the number
+ * of threads will be reduced so that the memory usage will not exceed
+ * memlimit_threading bytes. However, if memlimit_threading cannot
+ * be met even in single-threaded mode, then decoding will continue
+ * in single-threaded mode and memlimit_threading may be exceeded
+ * even by a large amount. That is, memlimit_threading will never make
+ * lzma_code() return LZMA_MEMLIMIT_ERROR. To truly cap the memory
+ * usage, see memlimit_stop below.
+ *
+ * Setting memlimit_threading to UINT64_MAX or a similar huge value
+ * means that liblzma is allowed to keep the whole compressed file
+ * and the whole uncompressed file in memory in addition to the memory
+ * needed by the decompressor data structures used by each thread!
+ * In other words, a reasonable value limit must be set here or it
+ * will cause problems sooner or later. If you have no idea what
+ * a reasonable value could be, try lzma_physmem() / 4 as a starting
+ * point. Setting this limit will never prevent decompression of
+ * a file; this will only reduce the number of threads.
+ *
+ * If memlimit_threading is greater than memlimit_stop, then the value
+ * of memlimit_stop will be used for both.
+ */
+ uint64_t memlimit_threading;
+
+ /**
+ * \brief Memory usage limit that should never be exceeded
+ *
+ * Encoder: Ignored.
+ *
+ * Decoder: If decompressing will need more than this amount of
+ * memory even in the single-threaded mode, then lzma_code() will
+ * return LZMA_MEMLIMIT_ERROR.
+ */
+ uint64_t memlimit_stop;
+
uint64_t reserved_int7;
uint64_t reserved_int8;
void *reserved_ptr1;
@@ -444,6 +491,60 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
lzma_nothrow lzma_attr_warn_unused_result;
+/**
+ * \brief MicroLZMA encoder
+ *
+ * The MicroLZMA format is a raw LZMA stream whose first byte (always 0x00)
+ * has been replaced with bitwise-negation of the LZMA properties (lc/lp/pb).
+ * This encoding ensures that the first byte of MicroLZMA stream is never
+ * 0x00. There is no end of payload marker and thus the uncompressed size
+ * must be stored separately. For the best error detection the dictionary
+ * size should be stored separately as well but alternatively one may use
+ * the uncompressed size as the dictionary size when decoding.
+ *
+ * With the MicroLZMA encoder, lzma_code() behaves slightly unusually.
+ * The action argument must be LZMA_FINISH and the return value will never be
+ * LZMA_OK. Thus the encoding is always done with a single lzma_code() after
+ * the initialization. The benefit of the combination of initialization
+ * function and lzma_code() is that memory allocations can be re-used for
+ * better performance.
+ *
+ * lzma_code() will try to encode as much input as is possible to fit into
+ * the given output buffer. If not all input can be encoded, the stream will
+ * be finished without encoding all the input. The caller must check both
+ * input and output buffer usage after lzma_code() (total_in and total_out
+ * in lzma_stream can be convenient). Often lzma_code() can fill the output
+ * buffer completely if there is a lot of input, but sometimes a few bytes
+ * may remain unused because the next LZMA symbol would require more space.
+ *
+ * lzma_stream.avail_out must be at least 6. Otherwise LZMA_PROG_ERROR
+ * will be returned.
+ *
+ * The LZMA dictionary should be reasonably low to speed up the encoder
+ * re-initialization. A good value is bigger than the resulting
+ * uncompressed size of most of the output chunks. For example, if output
+ * size is 4 KiB, dictionary size of 32 KiB or 64 KiB is good. If the
+ * data compresses extremely well, even 128 KiB may be useful.
+ *
+ * The MicroLZMA format and this encoder variant were made with the EROFS
+ * file system in mind. This format may be convenient in other embedded
+ * uses too where many small streams are needed. XZ Embedded includes a
+ * decoder for this format.
+ *
+ * \return - LZMA_STREAM_END: All good. Check the amounts of input used
+ * and output produced. Store the amount of input used
+ * (uncompressed size) as it needs to be known to decompress
+ * the data.
+ * - LZMA_OPTIONS_ERROR
+ * - LZMA_MEM_ERROR
+ * - LZMA_PROG_ERROR: In addition to the generic reasons for this
+ * error code, this may also be returned if there isn't enough
+ * output space (6 bytes) to create a valid MicroLZMA stream.
+ */
+extern LZMA_API(lzma_ret) lzma_microlzma_encoder(
+ lzma_stream *strm, const lzma_options_lzma *options);
+
+
/************
* Decoding *
************/
@@ -501,8 +602,8 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
/**
* This flag enables decoding of concatenated files with file formats that
* allow concatenating compressed files as is. From the formats currently
- * supported by liblzma, only the .xz format allows concatenated files.
- * Concatenated files are not allowed with the legacy .lzma format.
+ * supported by liblzma, only the .xz and .lz formats allow concatenated
+ * files. Concatenated files are not allowed with the legacy .lzma format.
*
* This flag also affects the usage of the `action' argument for lzma_code().
* When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END
@@ -516,6 +617,35 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
/**
+ * This flag makes the threaded decoder report errors (like LZMA_DATA_ERROR)
+ * as soon as they are detected. This saves time when the application has no
+ * interest in a partially decompressed truncated or corrupt file. Note that
+ * due to timing randomness, if the same truncated or corrupt input is
+ * decompressed multiple times with this flag, a different amount of output
+ * may be produced by different runs, and even the error code might vary.
+ *
+ * When using LZMA_FAIL_FAST, it is recommended to use LZMA_FINISH to tell
+ * the decoder when no more input will be coming because it can help fast
+ * detection and reporting of truncated files. Note that in this situation
+ * truncated files might be diagnosed with LZMA_DATA_ERROR instead of
+ * LZMA_OK or LZMA_BUF_ERROR!
+ *
+ * Without this flag the threaded decoder will provide as much output as
+ * possible at first and then report the pending error. This default behavior
+ * matches the single-threaded decoder and provides repeatable behavior
+ * with truncated or corrupt input. There are a few special cases where the
+ * behavior can still differ like memory allocation failures (LZMA_MEM_ERROR).
+ *
+ * Single-threaded decoders currently ignore this flag.
+ *
+ * Support for this flag was added in liblzma 5.3.3alpha. Note that in older
+ * versions this flag isn't supported (LZMA_OPTIONS_ERROR) even by functions
+ * that ignore this flag in newer liblzma versions.
+ */
+#define LZMA_FAIL_FAST UINT32_C(0x20)
+
+
+/**
* \brief Initialize .xz Stream decoder
*
* \param strm Pointer to properly prepared lzma_stream
@@ -526,7 +656,8 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
* had been specified.
* \param flags Bitwise-or of zero or more of the decoder flags:
* LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
- * LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED
+ * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK,
+ * LZMA_CONCATENATED, LZMA_FAIL_FAST
*
* \return - LZMA_OK: Initialization was successful.
* - LZMA_MEM_ERROR: Cannot allocate memory.
@@ -539,11 +670,50 @@ extern LZMA_API(lzma_ret) lzma_stream_decoder(
/**
- * \brief Decode .xz Streams and .lzma files with autodetection
+ * \brief Initialize multithreaded .xz Stream decoder
+ *
+ * \param strm Pointer to properly prepared lzma_stream
+ * \param options Pointer to multithreaded compression options
+ *
+ * The decoder can decode multiple Blocks in parallel. This requires that each
+ * Block Header contains the Compressed Size and Uncompressed size fields
+ * which are added by the multi-threaded encoder, see lzma_stream_encoder_mt().
+ *
+ * A Stream with one Block will only utilize one thread. A Stream with multiple
+ * Blocks but without size information in Block Headers will be processed in
+ * single-threaded mode in the same way as done by lzma_stream_decoder().
+ * Concatenated Streams are processed one Stream at a time; no inter-Stream
+ * parallelization is done.
+ *
+ * This function behaves like lzma_stream_decoder() when options->threads == 1
+ * and options->memlimit_threading <= 1.
+ *
+ * \return - LZMA_OK: Initialization was successful.
+ * - LZMA_MEM_ERROR: Cannot allocate memory.
+ * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached.
+ * - LZMA_OPTIONS_ERROR: Unsupported flags.
+ * - LZMA_PROG_ERROR
+ */
+extern LZMA_API(lzma_ret) lzma_stream_decoder_mt(
+ lzma_stream *strm, const lzma_mt *options)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
+ * \brief Decode .xz, .lzma, and .lz (lzip) files with autodetection
+ *
+ * This decoder autodetects between the .xz, .lzma, and .lz file formats,
+ * and calls lzma_stream_decoder(), lzma_alone_decoder(), or
+ * lzma_lzip_decoder() once the type of the input file has been detected.
*
- * This decoder autodetects between the .xz and .lzma file formats, and
- * calls lzma_stream_decoder() or lzma_alone_decoder() once the type
- * of the input file has been detected.
+ * Support for .lz was added in 5.4.0.
+ *
+ * If the flag LZMA_CONCATENATED is used and the input is a .lzma file:
+ * For historical reasons concatenated .lzma files aren't supported.
+ * If there is trailing data after one .lzma stream, lzma_code() will
+ * return LZMA_DATA_ERROR. (lzma_alone_decoder() doesn't have such a check
+ * as it doesn't support any decoder flags. It will return LZMA_STREAM_END
+ * after one .lzma stream.)
*
* \param strm Pointer to properly prepared lzma_stream
* \param memlimit Memory usage limit as bytes. Use UINT64_MAX
@@ -551,7 +721,10 @@ extern LZMA_API(lzma_ret) lzma_stream_decoder(
* 5.2.3 and earlier don't allow 0 here and return
* LZMA_PROG_ERROR; later versions treat 0 as if 1
* had been specified.
- * \param flags Bitwise-or of flags, or zero for no flags.
+ * \param flags Bitwise-or of zero or more of the decoder flags:
+ * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
+ * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK,
+ * LZMA_CONCATENATED, LZMA_FAIL_FAST
*
* \return - LZMA_OK: Initialization was successful.
* - LZMA_MEM_ERROR: Cannot allocate memory.
@@ -587,6 +760,64 @@ extern LZMA_API(lzma_ret) lzma_alone_decoder(
/**
+ * \brief Initialize .lz (lzip) decoder (a foreign file format)
+ *
+ * \param strm Pointer to properly prepared lzma_stream
+ * \param memlimit Memory usage limit as bytes. Use UINT64_MAX
+ * to effectively disable the limiter.
+ * \param flags Bitwise-or of flags, or zero for no flags.
+ * All decoder flags listed above are supported
+ * although only LZMA_CONCATENATED and (in very rare
+ * cases) LZMA_IGNORE_CHECK are actually useful.
+ * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
+ * and LZMA_FAIL_FAST do nothing. LZMA_TELL_ANY_CHECK
+ * is supported for consistency only as CRC32 is
+ * always used in the .lz format.
+ *
+ * This decoder supports the .lz format version 0 and the unextended .lz
+ * format version 1:
+ *
+ * - Files in the format version 0 were produced by lzip 1.3 and older.
+ * Such files aren't common but may be found from file archives
+ * as a few source packages were released in this format. People
+ * might have old personal files in this format too. Decompression
+ * support for the format version 0 was removed in lzip 1.18.
+ *
+ * - lzip 1.3 added decompression support for .lz format version 1 files.
+ * Compression support was added in lzip 1.4. In lzip 1.6 the .lz format
+ * version 1 was extended to support the Sync Flush marker. This extension
+ * is not supported by liblzma. lzma_code() will return LZMA_DATA_ERROR
+ * at the location of the Sync Flush marker. In practice files with
+ * the Sync Flush marker are very rare and thus liblzma can decompress
+ * almost all .lz files.
+ *
+ * Just like with lzma_stream_decoder() for .xz files, LZMA_CONCATENATED
+ * should be used when decompressing normal standalone .lz files.
+ *
+ * The .lz format allows putting non-.lz data at the end of a file after at
+ * least one valid .lz member. That is, one can append custom data at the end
+ * of a .lz file and the decoder is required to ignore it. In liblzma this
+ * is relevant only when LZMA_CONCATENATED is used. In that case lzma_code()
+ * will return LZMA_STREAM_END and leave lzma_stream.next_in pointing to
+ * the first byte of the non-.lz data. An exception to this is if the first
+ * 1-3 bytes of the non-.lz data are identical to the .lz magic bytes
+ * (0x4C, 0x5A, 0x49, 0x50; "LZIP" in US-ASCII). In such a case the 1-3 bytes
+ * will have been ignored by lzma_code(). If one wishes to locate the non-.lz
+ * data reliably, one must ensure that the first byte isn't 0x4C. Actually
+ * one should ensure that none of the first four bytes of trailing data are
+ * equal to the magic bytes because lzip >= 1.20 requires it by default.
+ *
+ * \return - LZMA_OK: Initialization was successful.
+ * - LZMA_MEM_ERROR: Cannot allocate memory.
+ * - LZMA_OPTIONS_ERROR: Unsupported flags
+ * - LZMA_PROG_ERROR
+ */
+extern LZMA_API(lzma_ret) lzma_lzip_decoder(
+ lzma_stream *strm, uint64_t memlimit, uint32_t flags)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
* \brief Single-call .xz Stream decoder
*
* \param memlimit Pointer to how much memory the decoder is allowed
@@ -595,7 +826,8 @@ extern LZMA_API(lzma_ret) lzma_alone_decoder(
* returned.
* \param flags Bitwise-or of zero or more of the decoder flags:
* LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
- * LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK
+ * LZMA_IGNORE_CHECK, LZMA_CONCATENATED,
+ * LZMA_FAIL_FAST. Note that LZMA_TELL_ANY_CHECK
* is not allowed and will return LZMA_PROG_ERROR.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
@@ -630,3 +862,43 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode(
const uint8_t *in, size_t *in_pos, size_t in_size,
uint8_t *out, size_t *out_pos, size_t out_size)
lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
+ * \brief MicroLZMA decoder
+ *
+ * See lzma_microlzma_decoder() for more information.
+ *
+ * The lzma_code() usage with this decoder is completely normal. The
+ * special behavior of lzma_code() applies to lzma_microlzma_encoder() only.
+ *
+ * \param strm Pointer to properly prepared lzma_stream
+ * \param comp_size Compressed size of the MicroLZMA stream.
+ * The caller must somehow know this exactly.
+ * \param uncomp_size Uncompressed size of the MicroLZMA stream.
+ * If the exact uncompressed size isn't known, this
+ * can be set to a value that is at most as big as
+ * the exact uncompressed size would be, but then the
+ * next argument uncomp_size_is_exact must be false.
+ * \param uncomp_size_is_exact
+ * If true, uncomp_size must be exactly correct.
+ * This will improve error detection at the end of
+ * the stream. If the exact uncompressed size isn't
+ * known, this must be false. uncomp_size must still
+ * be at most as big as the exact uncompressed size
+ * is. Setting this to false when the exact size is
+ * known will work but error detection at the end of
+ * the stream will be weaker.
+ * \param dict_size LZMA dictionary size that was used when
+ * compressing the data. It is OK to use a bigger
+ * value too but liblzma will then allocate more
+ * memory than would actually be required and error
+ * detection will be slightly worse. (Note that with
+ * the implementation in XZ Embedded it doesn't
+ * affect the memory usage if one specifies bigger
+ * dictionary than actually required.)
+ */
+extern LZMA_API(lzma_ret) lzma_microlzma_decoder(
+ lzma_stream *strm, uint64_t comp_size,
+ uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
+ uint32_t dict_size);
diff --git a/contrib/libs/lzma/liblzma/api/lzma/filter.h b/contrib/libs/lzma/liblzma/api/lzma/filter.h
index 8c85931476..58c83bf7ed 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/filter.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/filter.h
@@ -108,7 +108,9 @@ extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id)
* need to be initialized by the caller in any way.
*
* If an error occurs, memory possibly already allocated by this function
- * is always freed.
+ * is always freed. liblzma versions older than 5.2.7 may modify the dest
+ * array and leave its contents in an undefined state if an error occurs.
+ * liblzma 5.2.7 and newer only modify the dest array when returning LZMA_OK.
*
* \return - LZMA_OK
* - LZMA_MEM_ERROR
@@ -118,7 +120,29 @@ extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id)
*/
extern LZMA_API(lzma_ret) lzma_filters_copy(
const lzma_filter *src, lzma_filter *dest,
- const lzma_allocator *allocator) lzma_nothrow;
+ const lzma_allocator *allocator)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
+ * \brief Free the options in the array of lzma_filter structures
+ *
+ * This frees the filter chain options. The filters array itself is not freed.
+ *
+ * The filters array must have at most LZMA_FILTERS_MAX + 1 elements
+ * including the terminating element which must have .id = LZMA_VLI_UNKNOWN.
+ * For all elements before the terminating element:
+ * - options will be freed using the given lzma_allocator or,
+ * if allocator is NULL, using free().
+ * - options will be set to NULL.
+ * - id will be set to LZMA_VLI_UNKNOWN.
+ *
+ * If filters is NULL, this does nothing but remember that this never frees
+ * the filters array itself.
+ */
+extern LZMA_API(void) lzma_filters_free(
+ lzma_filter *filters, const lzma_allocator *allocator)
+ lzma_nothrow;
/**
@@ -202,21 +226,27 @@ extern LZMA_API(lzma_ret) lzma_raw_decoder(
/**
* \brief Update the filter chain in the encoder
*
- * This function is for advanced users only. This function has two slightly
- * different purposes:
+ * This function may be called after lzma_code() has returned LZMA_STREAM_END
+ * when LZMA_FULL_BARRIER, LZMA_FULL_FLUSH, or LZMA_SYNC_FLUSH was used:
*
- * - After LZMA_FULL_FLUSH when using Stream encoder: Set a new filter
- * chain, which will be used starting from the next Block.
+ * - After LZMA_FULL_BARRIER or LZMA_FULL_FLUSH: Single-threaded .xz Stream
+ * encoder (lzma_stream_encoder()) and (since liblzma 5.4.0) multi-threaded
+ * Stream encoder (lzma_stream_encoder_mt()) allow setting a new filter
+ * chain to be used for the next Block(s).
*
- * - After LZMA_SYNC_FLUSH using Raw, Block, or Stream encoder: Change
- * the filter-specific options in the middle of encoding. The actual
- * filters in the chain (Filter IDs) cannot be changed. In the future,
- * it might become possible to change the filter options without
- * using LZMA_SYNC_FLUSH.
+ * - After LZMA_SYNC_FLUSH: Raw encoder (lzma_raw_encoder()),
+ * Block encocder (lzma_block_encoder()), and single-threaded .xz Stream
+ * encoder (lzma_stream_encoder()) allow changing certain filter-specific
+ * options in the middle of encoding. The actual filters in the chain
+ * (Filter IDs) must not be changed! Currently only the lc, lp, and pb
+ * options of LZMA2 (not LZMA1) can be changed this way.
*
- * While rarely useful, this function may be called also when no data has
- * been compressed yet. In that case, this function will behave as if
- * LZMA_FULL_FLUSH (Stream encoder) or LZMA_SYNC_FLUSH (Raw or Block
+ * - In the future some filters might allow changing some of their options
+ * without any barrier or flushing but currently such filters don't exist.
+ *
+ * This function may also be called when no data has been compressed yet
+ * although this is rarely useful. In that case, this function will behave
+ * as if LZMA_FULL_FLUSH (Stream encoders) or LZMA_SYNC_FLUSH (Raw or Block
* encoder) had been used right before calling this function.
*
* \return - LZMA_OK
@@ -424,3 +454,261 @@ extern LZMA_API(lzma_ret) lzma_filter_flags_decode(
lzma_filter *filter, const lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos, size_t in_size)
lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/***********
+ * Strings *
+ ***********/
+
+/**
+ * \brief Allow or show all filters
+ *
+ * By default only the filters supported in the .xz format are accept by
+ * lzma_str_to_filters() or shown by lzma_str_list_filters().
+ */
+#define LZMA_STR_ALL_FILTERS UINT32_C(0x01)
+
+
+/**
+ * \brief Do not validate the filter chain in lzma_str_to_filters()
+ *
+ * By default lzma_str_to_filters() can return an error if the filter chain
+ * as a whole isn't usable in the .xz format or in the raw encoder or decoder.
+ * With this flag the validation is skipped (this doesn't affect the handling
+ * of the individual filter options).
+ */
+#define LZMA_STR_NO_VALIDATION UINT32_C(0x02)
+
+
+/**
+ * \brief Stringify encoder options
+ *
+ * Show the filter-specific options that the encoder will use.
+ * This may be useful for verbose diagnostic messages.
+ *
+ * Note that if options were decoded from .xz headers then the encoder options
+ * may be undefined. This flag shouldn't be used in such a situation.
+ */
+#define LZMA_STR_ENCODER UINT32_C(0x10)
+
+
+/**
+ * \brief Stringify decoder options
+ *
+ * Show the filter-specific options that the decoder will use.
+ * This may be useful for showing what filter options were decoded
+ * from file headers.
+ */
+#define LZMA_STR_DECODER UINT32_C(0x20)
+
+
+/**
+ * \brief Produce xz-compatible getopt_long() syntax
+ *
+ * That is, "delta:dist=2 lzma2:dict=4MiB,pb=1,lp=1" becomes
+ * "--delta=dist=2 --lzma2=dict=4MiB,pb=1,lp=1".
+ *
+ * This syntax is compatible with xz 5.0.0 as long as the filters and
+ * their options are supported too.
+ */
+#define LZMA_STR_GETOPT_LONG UINT32_C(0x40)
+
+
+/**
+ * \brief Use two dashes "--" instead of a space to separate filters
+ *
+ * That is, "delta:dist=2 lzma2:pb=1,lp=1" becomes
+ * "delta:dist=2--lzma2:pb=1,lp=1". This looks slightly odd but this
+ * kind of strings should be usable on the command line without quoting.
+ * However, it is possible that future versions with new filter options
+ * might produce strings that require shell quoting anyway as the exact
+ * set of possible characters isn't frozen for now.
+ *
+ * It is guaranteed that the single quote (') will never be used in
+ * filter chain strings (even if LZMA_STR_NO_SPACES isn't used).
+ */
+#define LZMA_STR_NO_SPACES UINT32_C(0x80)
+
+
+/**
+ * \brief Convert a string to a filter chain
+ *
+ * This tries to make it easier to write applications that allow users
+ * to set custom compression options. This only handles the filter
+ * configuration (including presets) but not the number of threads,
+ * block size, check type, or memory limits.
+ *
+ * The input string can be either a preset or a filter chain. Presets
+ * begin with a digit 0-9 and may be followed by zero or more flags
+ * which are lower-case letters. Currently only "e" is supported, matching
+ * LZMA_PRESET_EXTREME. For partial xz command line syntax compatibility,
+ * a preset string may start with a single dash "-".
+ *
+ * A filter chain consists of one or more "filtername:opt1=value1,opt2=value2"
+ * strings separated by one or more spaces. Leading and trailing spaces are
+ * ignored. All names and values must be lower-case. Extra commas in the
+ * option list are ignored. The order of filters is significant: when
+ * encoding, the uncompressed input data goes to the leftmost filter first.
+ * Normally "lzma2" is the last filter in the chain.
+ *
+ * If one wishes to avoid spaces, for example, to avoid shell quoting,
+ * it is possible to use two dashes "--" instead of spaces to separate
+ * the filters.
+ *
+ * For xz command line compatibility, each filter may be prefixed with
+ * two dashes "--" and the colon ":" separating the filter name from
+ * the options may be replaced with an equals sign "=".
+ *
+ * By default, only filters that can be used in the .xz format are accepted.
+ * To allow all filters (LZMA1) use the flag LZMA_STR_ALL_FILTERS.
+ *
+ * By default, very basic validation is done for the filter chain as a whole,
+ * for example, that LZMA2 is only used as the last filter in the chain.
+ * The validation isn't perfect though and it's possible that this function
+ * succeeds but using the filter chain for encoding or decoding will still
+ * result in LZMA_OPTIONS_ERROR. To disable this validation, use the flag
+ * LZMA_STR_NO_VALIDATION.
+ *
+ * The available filter names and their options are available via
+ * lzma_str_list_filters(). See the xz man page for the description
+ * of filter names and options.
+ *
+ * \param str User-supplied string describing a preset or
+ * a filter chain. If a default value is needed and
+ * you don't know what would be good, use "6" since
+ * that is the default preset in xz too.
+ * \param error_pos If this isn't NULL, this value will be set on
+ * both success and on all errors. This tells the
+ * location of the error in the string. This is
+ * an int to make it straightforward to use this
+ * as printf() field width. The value is guaranteed
+ * to be in the range [0, INT_MAX] even if strlen(str)
+ * somehow was greater than INT_MAX.
+ * \param filters An array of lzma_filter structures. There must
+ * be LZMA_FILTERS_MAX + 1 (that is, five) elements
+ * in the array. The old contents are ignored so it
+ * doesn't need to be initialized. This array is
+ * modified only if this function returns LZMA_OK.
+ * Once the allocated filter options are no longer
+ * needed, lzma_filters_free() can be used to free the
+ * options (it doesn't free the filters array itself).
+ * \param flags Bitwise-or of zero or more of the flags
+ * LZMA_STR_ALL_FILTERS and LZMA_STR_NO_VALIDATION.
+ * \param allocator lzma_allocator for custom allocator functions.
+ * Set to NULL to use malloc() and free().
+ *
+ * \return On success, NULL is returned. On error, a statically-allocated
+ * error message is returned which together with the error_pos
+ * should give some idea what is wrong.
+ *
+ * For command line applications, below is an example how an error message
+ * can be displayed. Note the use of an empty string for the field width.
+ * If "^" was used there it would create an off-by-one error except at
+ * the very beginning of the line.
+ *
+ * \code{.c}
+ * const char *str = ...; // From user
+ * lzma_filter filters[LZMA_FILTERS_MAX + 1];
+ * int pos;
+ * const char *msg = lzma_str_to_filters(str, &pos, filters, 0, NULL);
+ * if (msg != NULL) {
+ * printf("%s: Error in XZ compression options:\n", argv[0]);
+ * printf("%s: %s\n", argv[0], str);
+ * printf("%s: %*s^\n", argv[0], errpos, "");
+ * printf("%s: %s\n", argv[0], msg);
+ * }
+ * \endcode
+ */
+extern LZMA_API(const char *) lzma_str_to_filters(
+ const char *str, int *error_pos, lzma_filter *filters,
+ uint32_t flags, const lzma_allocator *allocator)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
+ * \brief Convert a filter chain to a string
+ *
+ * Use cases:
+ *
+ * - Verbose output showing the full encoder options to the user
+ * (use LZMA_STR_ENCODER in flags)
+ *
+ * - Showing the filters and options that are required to decode a file
+ * (use LZMA_STR_DECODER in flags)
+ *
+ * - Showing the filter names without any options in informational messages
+ * where the technical details aren't important (no flags). In this case
+ * the .options in the filters array are ignored and may be NULL even if
+ * a filter has a mandatory options structure.
+ *
+ * Note that even if the filter chain was specified using a preset,
+ * the resulting filter chain isn't reversed to a preset. So if you
+ * specify "6" to lzma_str_to_filters() then lzma_str_from_filters()
+ * will produce a string containing "lzma2".
+ *
+ * \param str On success *str will be set to point to an
+ * allocated string describing the given filter
+ * chain. Old value is ignored. On error *str is
+ * always set to NULL.
+ * \param filters Array of 1-4 filters and a terminating element
+ * with .id = LZMA_VLI_UNKNOWN.
+ * \param flags Bitwise-or of zero or more of the flags
+ * LZMA_STR_ENCODER, LZMA_STR_DECODER,
+ * LZMA_STR_GETOPT_LONG, and LZMA_STR_NO_SPACES.
+ * \param allocator lzma_allocator for custom allocator functions.
+ * Set to NULL to use malloc() and free().
+ *
+ * \return - LZMA_OK
+ * - LZMA_OPTIONS_ERROR: Empty filter chain
+ * (filters[0].id == LZMA_VLI_UNKNOWN) or the filter chain
+ * includes a Filter ID that is not supported by this function.
+ * - LZMA_MEM_ERROR
+ * - LZMA_PROG_ERROR
+ */
+extern LZMA_API(lzma_ret) lzma_str_from_filters(
+ char **str, const lzma_filter *filters, uint32_t flags,
+ const lzma_allocator *allocator)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
+ * \brief List available filters and/or their options (for help message)
+ *
+ * If a filter_id is given then only one line is created which contains the
+ * filter name. If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then the
+ * options required for encoding or decoding are listed on the same line too.
+ *
+ * If filter_id is LZMA_VLI_UNKNOWN then all supported .xz-compatible filters
+ * are listed:
+ *
+ * - If neither LZMA_STR_ENCODER nor LZMA_STR_DECODER is used then
+ * the supported filter names are listed on a single line separated
+ * by spaces.
+ *
+ * - If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then filters and
+ * the supported options are listed one filter per line. There won't
+ * be a '\n' after the last filter.
+ *
+ * - If LZMA_STR_ALL_FILTERS is used then the list will include also
+ * those filters that cannot be used in the .xz format (LZMA1).
+ *
+ * \param str On success *str will be set to point to an
+ * allocated string listing the filters and options.
+ * Old value is ignored. On error *str is always set
+ * to NULL.
+ * \param filter_id Filter ID or LZMA_VLI_UNKNOWN.
+ * \param flags Bitwise-or of zero or more of the flags
+ * LZMA_STR_ALL_FILTERS, LZMA_STR_ENCODER,
+ * LZMA_STR_DECODER, and LZMA_STR_GETOPT_LONG.
+ * \param allocator lzma_allocator for custom allocator functions.
+ * Set to NULL to use malloc() and free().
+ *
+ * \return - LZMA_OK
+ * - LZMA_OPTIONS_ERROR: Unsupported filter_id or flags
+ * - LZMA_MEM_ERROR
+ * - LZMA_PROG_ERROR
+ */
+extern LZMA_API(lzma_ret) lzma_str_list_filters(
+ char **str, lzma_vli filter_id, uint32_t flags,
+ const lzma_allocator *allocator)
+ lzma_nothrow lzma_attr_warn_unused_result;
diff --git a/contrib/libs/lzma/liblzma/api/lzma/hardware.h b/contrib/libs/lzma/liblzma/api/lzma/hardware.h
index 47481f2581..7040aae45f 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/hardware.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/hardware.h
@@ -57,7 +57,7 @@ extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow;
* If the hardware supports more than one thread per CPU core, the number
* of hardware threads is returned if that information is available.
*
- * \brief On success, the number of available CPU threads or cores is
+ * \return On success, the number of available CPU threads or cores is
* returned. If this information isn't available or an error
* occurs, zero is returned.
*/
diff --git a/contrib/libs/lzma/liblzma/api/lzma/index.h b/contrib/libs/lzma/liblzma/api/lzma/index.h
index 3dac6fb85c..144d416615 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/index.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/index.h
@@ -684,3 +684,69 @@ extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i,
uint64_t *memlimit, const lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos, size_t in_size)
lzma_nothrow;
+
+
+/**
+ * \brief Initialize a .xz file information decoder
+ *
+ * \param strm Pointer to a properly prepared lzma_stream
+ * \param dest_index Pointer to a pointer where the decoder will put
+ * the decoded lzma_index. The old value
+ * of *dest_index is ignored (not freed).
+ * \param memlimit How much memory the resulting lzma_index is
+ * allowed to require. Use UINT64_MAX to
+ * effectively disable the limiter.
+ * \param file_size Size of the input .xz file
+ *
+ * This decoder decodes the Stream Header, Stream Footer, Index, and
+ * Stream Padding field(s) from the input .xz file and stores the resulting
+ * combined index in *dest_index. This information can be used to get the
+ * uncompressed file size with lzma_index_uncompressed_size(*dest_index) or,
+ * for example, to implement random access reading by locating the Blocks
+ * in the Streams.
+ *
+ * To get the required information from the .xz file, lzma_code() may ask
+ * the application to seek in the input file by returning LZMA_SEEK_NEEDED
+ * and having the target file position specified in lzma_stream.seek_pos.
+ * The number of seeks required depends on the input file and how big buffers
+ * the application provides. When possible, the decoder will seek backward
+ * and forward in the given buffer to avoid useless seek requests. Thus, if
+ * the application provides the whole file at once, no external seeking will
+ * be required (that is, lzma_code() won't return LZMA_SEEK_NEEDED).
+ *
+ * The value in lzma_stream.total_in can be used to estimate how much data
+ * liblzma had to read to get the file information. However, due to seeking
+ * and the way total_in is updated, the value of total_in will be somewhat
+ * inaccurate (a little too big). Thus, total_in is a good estimate but don't
+ * expect to see the same exact value for the same file if you change the
+ * input buffer size or switch to a different liblzma version.
+ *
+ * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH.
+ * You only need to use LZMA_RUN; LZMA_FINISH is only supported because it
+ * might be convenient for some applications. If you use LZMA_FINISH and if
+ * lzma_code() asks the application to seek, remember to reset `action' back
+ * to LZMA_RUN unless you hit the end of the file again.
+ *
+ * Possible return values from lzma_code():
+ * - LZMA_OK: All OK so far, more input needed
+ * - LZMA_SEEK_NEEDED: Provide more input starting from the absolute
+ * file position strm->seek_pos
+ * - LZMA_STREAM_END: Decoding was successful, *dest_index has been set
+ * - LZMA_FORMAT_ERROR: The input file is not in the .xz format (the
+ * expected magic bytes were not found from the beginning of the file)
+ * - LZMA_OPTIONS_ERROR: File looks valid but contains headers that aren't
+ * supported by this version of liblzma
+ * - LZMA_DATA_ERROR: File is corrupt
+ * - LZMA_BUF_ERROR
+ * - LZMA_MEM_ERROR
+ * - LZMA_MEMLIMIT_ERROR
+ * - LZMA_PROG_ERROR
+ *
+ * \return - LZMA_OK
+ * - LZMA_MEM_ERROR
+ * - LZMA_PROG_ERROR
+ */
+extern LZMA_API(lzma_ret) lzma_file_info_decoder(
+ lzma_stream *strm, lzma_index **dest_index,
+ uint64_t memlimit, uint64_t file_size)
+ lzma_nothrow;
diff --git a/contrib/libs/lzma/liblzma/api/lzma/index_hash.h b/contrib/libs/lzma/liblzma/api/lzma/index_hash.h
index 9287f1dfdb..8ff4163365 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/index_hash.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/index_hash.h
@@ -52,7 +52,7 @@ extern LZMA_API(void) lzma_index_hash_end(
/**
* \brief Add a new Record to an Index hash
*
- * \param index Pointer to a lzma_index_hash structure
+ * \param index_hash Pointer to a lzma_index_hash structure
* \param unpadded_size Unpadded Size of a Block
* \param uncompressed_size Uncompressed Size of a Block
*
diff --git a/contrib/libs/lzma/liblzma/api/lzma/lzma12.h b/contrib/libs/lzma/liblzma/api/lzma/lzma12.h
index df5f23b61a..d34e78392b 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/lzma12.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/lzma12.h
@@ -18,18 +18,41 @@
/**
- * \brief LZMA1 Filter ID
+ * \brief LZMA1 Filter ID (for raw encoder/decoder only, not in .xz)
*
* LZMA1 is the very same thing as what was called just LZMA in LZMA Utils,
* 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from
* accidentally using LZMA when they actually want LZMA2.
- *
- * LZMA1 shouldn't be used for new applications unless you _really_ know
- * what you are doing. LZMA2 is almost always a better choice.
*/
#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001)
/**
+ * \brief LZMA1 Filter ID with extended options (for raw encoder/decoder)
+ *
+ * This is like LZMA_FILTER_LZMA1 but with this ID a few extra options
+ * are supported in the lzma_options_lzma structure:
+ *
+ * - A flag to tell the encoder if the end of payload marker (EOPM) alias
+ * end of stream (EOS) marker must be written at the end of the stream.
+ * In contrast, LZMA_FILTER_LZMA1 always writes the end marker.
+ *
+ * - Decoder needs to be told the uncompressed size of the stream
+ * or that it is unknown (using the special value UINT64_MAX).
+ * If the size is known, a flag can be set to allow the presence of
+ * the end marker anyway. In contrast, LZMA_FILTER_LZMA1 always
+ * behaves as if the uncompressed size was unknown.
+ *
+ * This allows handling file formats where LZMA1 streams are used but where
+ * the end marker isn't allowed or where it might not (always) be present.
+ * This extended LZMA1 functionality is provided as a Filter ID for raw
+ * encoder and decoder instead of adding new encoder and decoder initialization
+ * functions because this way it is possible to also use extra filters,
+ * for example, LZMA_FILTER_X86 in a filter chain with LZMA_FILTER_LZMA1EXT,
+ * which might be needed to handle some file formats.
+ */
+#define LZMA_FILTER_LZMA1EXT LZMA_VLI_C(0x4000000000000002)
+
+/**
* \brief LZMA2 Filter ID
*
* Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds
@@ -374,6 +397,82 @@ typedef struct {
*/
uint32_t depth;
+ /**
+ * \brief For LZMA_FILTER_LZMA1EXT: Extended flags
+ *
+ * This is used only with LZMA_FILTER_LZMA1EXT.
+ *
+ * Currently only one flag is supported, LZMA_LZMA1EXT_ALLOW_EOPM:
+ *
+ * - Encoder: If the flag is set, then end marker is written just
+ * like it is with LZMA_FILTER_LZMA1. Without this flag the
+ * end marker isn't written and the application has to store
+ * the uncompressed size somewhere outside the compressed stream.
+ * To decompress streams without the end marker, the appliation
+ * has to set the correct uncompressed size in ext_size_low and
+ * ext_size_high.
+ *
+ * - Decoder: If the uncompressed size in ext_size_low and
+ * ext_size_high is set to the special value UINT64_MAX
+ * (indicating unknown uncompressed size) then this flag is
+ * ignored and the end marker must always be present, that is,
+ * the behavior is identical to LZMA_FILTER_LZMA1.
+ *
+ * Otherwise, if this flag isn't set, then the input stream
+ * must not have the end marker; if the end marker is detected
+ * then it will result in LZMA_DATA_ERROR. This is useful when
+ * it is known that the stream must not have the end marker and
+ * strict validation is wanted.
+ *
+ * If this flag is set, then it is autodetected if the end marker
+ * is present after the specified number of uncompressed bytes
+ * has been decompressed (ext_size_low and ext_size_high). The
+ * end marker isn't allowed in any other position. This behavior
+ * is useful when uncompressed size is known but the end marker
+ * may or may not be present. This is the case, for example,
+ * in .7z files (valid .7z files that have the end marker in
+ * LZMA1 streams are rare but they do exist).
+ */
+ uint32_t ext_flags;
+# define LZMA_LZMA1EXT_ALLOW_EOPM UINT32_C(0x01)
+
+ /**
+ * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (low bits)
+ *
+ * The 64-bit uncompressed size is needed for decompression with
+ * LZMA_FILTER_LZMA1EXT. The size is ignored by the encoder.
+ *
+ * The special value UINT64_MAX indicates that the uncompressed size
+ * is unknown and that the end of payload marker (also known as
+ * end of stream marker) must be present to indicate the end of
+ * the LZMA1 stream. Any other value indicates the expected
+ * uncompressed size of the LZMA1 stream. (If LZMA1 was used together
+ * with filters that change the size of the data then the uncompressed
+ * size of the LZMA1 stream could be different than the final
+ * uncompressed size of the filtered stream.)
+ *
+ * ext_size_low holds the least significant 32 bits of the
+ * uncompressed size. The most significant 32 bits must be set
+ * in ext_size_high. The macro lzma_ext_size_set(opt_lzma, u64size)
+ * can be used to set these members.
+ *
+ * The 64-bit uncompressed size is split into two uint32_t variables
+ * because there were no reserved uint64_t members and using the
+ * same options structure for LZMA_FILTER_LZMA1, LZMA_FILTER_LZMA1EXT,
+ * and LZMA_FILTER_LZMA2 was otherwise more convenient than having
+ * a new options structure for LZMA_FILTER_LZMA1EXT. (Replacing two
+ * uint32_t members with one uint64_t changes the ABI on some systems
+ * as the alignment of this struct can increase from 4 bytes to 8.)
+ */
+ uint32_t ext_size_low;
+
+ /**
+ * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (high bits)
+ *
+ * This holds the most significant 32 bits of the uncompressed size.
+ */
+ uint32_t ext_size_high;
+
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. You should not touch these, because the names
@@ -381,9 +480,6 @@ typedef struct {
* with the currently supported options, so it is safe to leave these
* uninitialized.
*/
- uint32_t reserved_int1;
- uint32_t reserved_int2;
- uint32_t reserved_int3;
uint32_t reserved_int4;
uint32_t reserved_int5;
uint32_t reserved_int6;
@@ -400,6 +496,19 @@ typedef struct {
/**
+ * \brief Macro to set the 64-bit uncompressed size in ext_size_*
+ *
+ * This might be convenient when decoding using LZMA_FILTER_LZMA1EXT.
+ * This isn't used with LZMA_FILTER_LZMA1 or LZMA_FILTER_LZMA2.
+ */
+#define lzma_set_ext_size(opt_lzma2, u64size) \
+do { \
+ (opt_lzma2).ext_size_low = (uint32_t)(u64size); \
+ (opt_lzma2).ext_size_high = (uint32_t)((uint64_t)(u64size) >> 32); \
+} while (0)
+
+
+/**
* \brief Set a compression preset to lzma_options_lzma structure
*
* 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9
diff --git a/contrib/libs/lzma/liblzma/api/lzma/version.h b/contrib/libs/lzma/liblzma/api/lzma/version.h
index 2bf3eaed24..7d37130f24 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/version.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/version.h
@@ -21,8 +21,8 @@
* Version number split into components
*/
#define LZMA_VERSION_MAJOR 5
-#define LZMA_VERSION_MINOR 2
-#define LZMA_VERSION_PATCH 5
+#define LZMA_VERSION_MINOR 4
+#define LZMA_VERSION_PATCH 0
#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE
#ifndef LZMA_VERSION_COMMIT
diff --git a/contrib/libs/lzma/liblzma/api/lzma/vli.h b/contrib/libs/lzma/liblzma/api/lzma/vli.h
index 1b7a952a40..7a0a803dfc 100644
--- a/contrib/libs/lzma/liblzma/api/lzma/vli.h
+++ b/contrib/libs/lzma/liblzma/api/lzma/vli.h
@@ -159,6 +159,8 @@ extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *vli, size_t *vli_pos,
/**
* \brief Get the number of bytes required to encode a VLI
*
+ * \param vli Integer whose encoded size is to be determined
+ *
* \return Number of bytes on success (1-9). If vli isn't valid,
* zero is returned.
*/
diff --git a/contrib/libs/lzma/liblzma/check/crc64_fast.c b/contrib/libs/lzma/liblzma/check/crc64_fast.c
index 8af54cda7b..e3cbf1b1e9 100644
--- a/contrib/libs/lzma/liblzma/check/crc64_fast.c
+++ b/contrib/libs/lzma/liblzma/check/crc64_fast.c
@@ -3,11 +3,25 @@
/// \file crc64.c
/// \brief CRC64 calculation
///
-/// Calculate the CRC64 using the slice-by-four algorithm. This is the same
-/// idea that is used in crc32_fast.c, but for CRC64 we use only four tables
+/// There are two methods in this file. crc64_generic uses the
+/// the slice-by-four algorithm. This is the same idea that is
+/// used in crc32_fast.c, but for CRC64 we use only four tables
/// instead of eight to avoid increasing CPU cache usage.
+///
+/// crc64_clmul uses 32/64-bit x86 SSSE3, SSE4.1, and CLMUL instructions.
+/// It was derived from
+/// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+/// and the public domain code from https://github.com/rawrunprotected/crc
+/// (URLs were checked on 2022-11-07).
+///
+/// FIXME: Builds for 32-bit x86 use crc64_x86.S by default instead
+/// of this file and thus CLMUL version isn't available on 32-bit x86
+/// unless configured with --disable-assembler. Even then the lookup table
+/// isn't omitted in crc64_table.c since it doesn't know that assembly
+/// code has been disabled.
//
-// Author: Lasse Collin
+// Authors: Lasse Collin
+// Ilya Kurdyukov
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
@@ -15,6 +29,54 @@
///////////////////////////////////////////////////////////////////////////////
#include "check.h"
+
+#undef CRC_GENERIC
+#undef CRC_CLMUL
+#undef CRC_USE_GENERIC_FOR_SMALL_INPUTS
+
+// If CLMUL cannot be used then only the generic slice-by-four is built.
+#if !defined(HAVE_USABLE_CLMUL)
+# define CRC_GENERIC 1
+
+// If CLMUL is allowed unconditionally in the compiler options then the
+// generic version can be omitted. Note that this doesn't work with MSVC
+// as I don't know how to detect the features here.
+//
+// NOTE: Keep this this in sync with crc64_table.c.
+#elif (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \
+ || (defined(__e2k__) && __iset__ >= 6)
+# define CRC_CLMUL 1
+
+// Otherwise build both and detect at runtime which version to use.
+#else
+# define CRC_GENERIC 1
+# define CRC_CLMUL 1
+
+/*
+ // The generic code is much faster with 1-8-byte inputs and has
+ // similar performance up to 16 bytes at least in microbenchmarks
+ // (it depends on input buffer alignment too). If both versions are
+ // built, this #define will use the generic version for inputs up to
+ // 16 bytes and CLMUL for bigger inputs. It saves a little in code
+ // size since the special cases for 0-16-byte inputs will be omitted
+ // from the CLMUL code.
+# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1
+*/
+
+# if defined(_MSC_VER)
+# include <intrin.h>
+# elif defined(HAVE_CPUID_H)
+# include <cpuid.h>
+# endif
+#endif
+
+
+/////////////////////////////////
+// Generic slice-by-four CRC64 //
+/////////////////////////////////
+
+#ifdef CRC_GENERIC
+
#include "crc_macros.h"
@@ -26,8 +88,8 @@
// See the comments in crc32_fast.c. They aren't duplicated here.
-extern LZMA_API(uint64_t)
-lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
+static uint64_t
+crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
{
crc = ~crc;
@@ -46,10 +108,11 @@ lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
while (buf < limit) {
#ifdef WORDS_BIGENDIAN
- const uint32_t tmp = (crc >> 32)
+ const uint32_t tmp = (uint32_t)(crc >> 32)
^ aligned_read32ne(buf);
#else
- const uint32_t tmp = crc ^ aligned_read32ne(buf);
+ const uint32_t tmp = (uint32_t)crc
+ ^ aligned_read32ne(buf);
#endif
buf += 4;
@@ -70,3 +133,380 @@ lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
return ~crc;
}
+#endif
+
+
+/////////////////////
+// x86 CLMUL CRC64 //
+/////////////////////
+
+#ifdef CRC_CLMUL
+
+#include <immintrin.h>
+
+
+/*
+// These functions were used to generate the constants
+// at the top of crc64_clmul().
+static uint64_t
+calc_lo(uint64_t poly)
+{
+ uint64_t a = poly;
+ uint64_t b = 0;
+
+ for (unsigned i = 0; i < 64; ++i) {
+ b = (b >> 1) | (a << 63);
+ a = (a >> 1) ^ (a & 1 ? poly : 0);
+ }
+
+ return b;
+}
+
+static uint64_t
+calc_hi(uint64_t poly, uint64_t a)
+{
+ for (unsigned i = 0; i < 64; ++i)
+ a = (a >> 1) ^ (a & 1 ? poly : 0);
+
+ return a;
+}
+*/
+
+
+#define MASK_L(in, mask, r) \
+ r = _mm_shuffle_epi8(in, mask)
+
+#define MASK_H(in, mask, r) \
+ r = _mm_shuffle_epi8(in, _mm_xor_si128(mask, vsign))
+
+#define MASK_LH(in, mask, low, high) \
+ MASK_L(in, mask, low); \
+ MASK_H(in, mask, high)
+
+
+// EDG-based compilers (Intel's classic compiler and compiler for E2K) can
+// define __GNUC__ but the attribute must not be used with them.
+// The new Clang-based ICX needs the attribute.
+//
+// NOTE: Build systems check for this too, keep them in sync with this.
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
+__attribute__((__target__("ssse3,sse4.1,pclmul")))
+#endif
+static uint64_t
+crc64_clmul(const uint8_t *buf, size_t size, uint64_t crc)
+{
+ // The prototypes of the intrinsics use signed types while most of
+ // the values are treated as unsigned here. These warnings in this
+ // function have been checked and found to be harmless so silence them.
+#if TUKLIB_GNUC_REQ(4, 6) || defined(__clang__)
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wsign-conversion"
+# pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
+ // The code assumes that there is at least one byte of input.
+ if (size == 0)
+ return crc;
+#endif
+
+ // const uint64_t poly = 0xc96c5795d7870f42; // CRC polynomial
+ const uint64_t p = 0x92d8af2baf0e1e85; // (poly << 1) | 1
+ const uint64_t mu = 0x9c3e466c172963d5; // (calc_lo(poly) << 1) | 1
+ const uint64_t k2 = 0xdabe95afc7875f40; // calc_hi(poly, 1)
+ const uint64_t k1 = 0xe05dd497ca393ae4; // calc_hi(poly, k2)
+ const __m128i vfold0 = _mm_set_epi64x(p, mu);
+ const __m128i vfold1 = _mm_set_epi64x(k2, k1);
+
+ // Create a vector with 8-bit values 0 to 15. This is used to
+ // construct control masks for _mm_blendv_epi8 and _mm_shuffle_epi8.
+ const __m128i vramp = _mm_setr_epi32(
+ 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c);
+
+ // This is used to inverse the control mask of _mm_shuffle_epi8
+ // so that bytes that wouldn't be picked with the original mask
+ // will be picked and vice versa.
+ const __m128i vsign = _mm_set1_epi8(0x80);
+
+ // Memory addresses A to D and the distances between them:
+ //
+ // A B C D
+ // [skip_start][size][skip_end]
+ // [ size2 ]
+ //
+ // A and D are 16-byte aligned. B and C are 1-byte aligned.
+ // skip_start and skip_end are 0-15 bytes. size is at least 1 byte.
+ //
+ // A = aligned_buf will initially point to this address.
+ // B = The address pointed by the caller-supplied buf.
+ // C = buf + size == aligned_buf + size2
+ // D = buf + size + skip_end == aligned_buf + size2 + skip_end
+ const size_t skip_start = (size_t)((uintptr_t)buf & 15);
+ const size_t skip_end = (size_t)(-(uintptr_t)(buf + size) & 15);
+ const __m128i *aligned_buf = (const __m128i *)(
+ (uintptr_t)buf & ~(uintptr_t)15);
+
+ // If size2 <= 16 then the whole input fits into a single 16-byte
+ // vector. If size2 > 16 then at least two 16-byte vectors must
+ // be processed. If size2 > 16 && size <= 16 then there is only
+ // one 16-byte vector's worth of input but it is unaligned in memory.
+ //
+ // NOTE: There is no integer overflow here if the arguments are valid.
+ // If this overflowed, buf + size would too.
+ size_t size2 = skip_start + size;
+
+ // Masks to be used with _mm_blendv_epi8 and _mm_shuffle_epi8:
+ // The first skip_start or skip_end bytes in the vectors will have
+ // the high bit (0x80) set. _mm_blendv_epi8 and _mm_shuffle_epi8
+ // will produce zeros for these positions. (Bitwise-xor of these
+ // masks with vsign will produce the opposite behavior.)
+ const __m128i mask_start
+ = _mm_sub_epi8(vramp, _mm_set1_epi8(skip_start));
+ const __m128i mask_end = _mm_sub_epi8(vramp, _mm_set1_epi8(skip_end));
+
+ // Get the first 1-16 bytes into data0. If loading less than 16 bytes,
+ // the bytes are loaded to the high bits of the vector and the least
+ // significant positions are filled with zeros.
+ const __m128i data0 = _mm_blendv_epi8(_mm_load_si128(aligned_buf),
+ _mm_setzero_si128(), mask_start);
+ ++aligned_buf;
+
+#if defined(__i386__) || defined(_M_IX86)
+ const __m128i initial_crc = _mm_set_epi64x(0, ~crc);
+#else
+ // GCC and Clang would produce good code with _mm_set_epi64x
+ // but MSVC needs _mm_cvtsi64_si128 on x86-64.
+ const __m128i initial_crc = _mm_cvtsi64_si128(~crc);
+#endif
+
+ __m128i v0, v1, v2, v3;
+
+#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
+ if (size <= 16) {
+ // Right-shift initial_crc by 1-16 bytes based on "size"
+ // and store the result in v1 (high bytes) and v0 (low bytes).
+ //
+ // NOTE: The highest 8 bytes of initial_crc are zeros so
+ // v1 will be filled with zeros if size >= 8. The highest 8
+ // bytes of v1 will always become zeros.
+ //
+ // [ v1 ][ v0 ]
+ // [ initial_crc ] size == 1
+ // [ initial_crc ] size == 2
+ // [ initial_crc ] size == 15
+ // [ initial_crc ] size == 16 (all in v0)
+ const __m128i mask_low = _mm_add_epi8(
+ vramp, _mm_set1_epi8(size - 16));
+ MASK_LH(initial_crc, mask_low, v0, v1);
+
+ if (size2 <= 16) {
+ // There are 1-16 bytes of input and it is all
+ // in data0. Copy the input bytes to v3. If there
+ // are fewer than 16 bytes, the low bytes in v3
+ // will be filled with zeros. That is, the input
+ // bytes are stored to the same position as
+ // (part of) initial_crc is in v0.
+ MASK_L(data0, mask_end, v3);
+ } else {
+ // There are 2-16 bytes of input but not all bytes
+ // are in data0.
+ const __m128i data1 = _mm_load_si128(aligned_buf);
+
+ // Collect the 2-16 input bytes from data0 and data1
+ // to v2 and v3, and bitwise-xor them with the
+ // low bits of initial_crc in v0. Note that the
+ // the second xor is below this else-block as it
+ // is shared with the other branch.
+ MASK_H(data0, mask_end, v2);
+ MASK_L(data1, mask_end, v3);
+ v0 = _mm_xor_si128(v0, v2);
+ }
+
+ v0 = _mm_xor_si128(v0, v3);
+ v1 = _mm_alignr_epi8(v1, v0, 8);
+ } else
+#endif
+ {
+ const __m128i data1 = _mm_load_si128(aligned_buf);
+ MASK_LH(initial_crc, mask_start, v0, v1);
+ v0 = _mm_xor_si128(v0, data0);
+ v1 = _mm_xor_si128(v1, data1);
+
+#define FOLD \
+ v1 = _mm_xor_si128(v1, _mm_clmulepi64_si128(v0, vfold1, 0x00)); \
+ v0 = _mm_xor_si128(v1, _mm_clmulepi64_si128(v0, vfold1, 0x11));
+
+ while (size2 > 32) {
+ ++aligned_buf;
+ size2 -= 16;
+ FOLD
+ v1 = _mm_load_si128(aligned_buf);
+ }
+
+ if (size2 < 32) {
+ MASK_H(v0, mask_end, v2);
+ MASK_L(v0, mask_end, v0);
+ MASK_L(v1, mask_end, v3);
+ v1 = _mm_or_si128(v2, v3);
+ }
+
+ FOLD
+ v1 = _mm_srli_si128(v0, 8);
+#undef FOLD
+ }
+
+ v1 = _mm_xor_si128(_mm_clmulepi64_si128(v0, vfold1, 0x10), v1);
+ v0 = _mm_clmulepi64_si128(v1, vfold0, 0x00);
+ v2 = _mm_clmulepi64_si128(v0, vfold0, 0x10);
+ v0 = _mm_xor_si128(_mm_xor_si128(v2, _mm_slli_si128(v0, 8)), v1);
+
+#if defined(__i386__) || defined(_M_IX86)
+ return ~(((uint64_t)(uint32_t)_mm_extract_epi32(v0, 3) << 32) |
+ (uint64_t)(uint32_t)_mm_extract_epi32(v0, 2));
+#else
+ return ~(uint64_t)_mm_extract_epi64(v0, 1);
+#endif
+
+#if TUKLIB_GNUC_REQ(4, 6) || defined(__clang__)
+# pragma GCC diagnostic pop
+#endif
+}
+#endif
+
+
+////////////////////////
+// Detect CPU support //
+////////////////////////
+
+#if defined(CRC_GENERIC) && defined(CRC_CLMUL)
+static inline bool
+is_clmul_supported(void)
+{
+ int success = 1;
+ uint32_t r[4]; // eax, ebx, ecx, edx
+
+#if defined(_MSC_VER)
+ // This needs <intrin.h> with MSVC. ICC has it as a built-in
+ // on all platforms.
+ __cpuid(r, 1);
+#elif defined(HAVE_CPUID_H)
+ // Compared to just using __asm__ to run CPUID, this also checks
+ // that CPUID is supported and saves and restores ebx as that is
+ // needed with GCC < 5 with position-independent code (PIC).
+ success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]);
+#else
+ // Just a fallback that shouldn't be needed.
+ __asm__("cpuid\n\t"
+ : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3])
+ : "a"(1), "c"(0));
+#endif
+
+ // Returns true if these are supported:
+ // CLMUL (bit 1 in ecx)
+ // SSSE3 (bit 9 in ecx)
+ // SSE4.1 (bit 19 in ecx)
+ const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19);
+ return success && (r[2] & ecx_mask) == ecx_mask;
+
+ // Alternative methods that weren't used:
+ // - ICC's _may_i_use_cpu_feature: the other methods should work too.
+ // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul")
+ //
+ // CPUID decding is needed with MSVC anyway and older GCC. This keeps
+ // the feature checks in the build system simpler too. The nice thing
+ // about __builtin_cpu_supports would be that it generates very short
+ // code as is it only reads a variable set at startup but a few bytes
+ // doesn't matter here.
+}
+
+
+#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
+# define CRC64_FUNC_INIT
+# define CRC64_SET_FUNC_ATTR __attribute__((__constructor__))
+#else
+# define CRC64_FUNC_INIT = &crc64_dispatch
+# define CRC64_SET_FUNC_ATTR
+static uint64_t crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc);
+#endif
+
+
+// Pointer to the the selected CRC64 method.
+static uint64_t (*crc64_func)(const uint8_t *buf, size_t size, uint64_t crc)
+ CRC64_FUNC_INIT;
+
+
+CRC64_SET_FUNC_ATTR
+static void
+crc64_set_func(void)
+{
+ crc64_func = is_clmul_supported() ? &crc64_clmul : &crc64_generic;
+ return;
+}
+
+
+#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
+static uint64_t
+crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc)
+{
+ // When __attribute__((__constructor__)) isn't supported, set the
+ // function pointer without any locking. If multiple threads run
+ // the detection code in parallel, they will all end up setting
+ // the pointer to the same value. This avoids the use of
+ // mythread_once() on every call to lzma_crc64() but this likely
+ // isn't strictly standards compliant. Let's change it if it breaks.
+ crc64_set_func();
+ return crc64_func(buf, size, crc);
+}
+#endif
+#endif
+
+
+extern LZMA_API(uint64_t)
+lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
+{
+#if defined(CRC_GENERIC) && defined(CRC_CLMUL)
+ // If CLMUL is available, it is the best for non-tiny inputs,
+ // being over twice as fast as the generic slice-by-four version.
+ // However, for size <= 16 it's different. In the extreme case
+ // of size == 1 the generic version can be five times faster.
+ // At size >= 8 the CLMUL starts to become reasonable. It
+ // varies depending on the alignment of buf too.
+ //
+ // The above doesn't include the overhead of mythread_once().
+ // At least on x86-64 GNU/Linux, pthread_once() is very fast but
+ // it still makes lzma_crc64(buf, 1, crc) 50-100 % slower. When
+ // size reaches 12-16 bytes the overhead becomes negligible.
+ //
+ // So using the generic version for size <= 16 may give better
+ // performance with tiny inputs but if such inputs happen rarely
+ // it's not so obvious because then the lookup table of the
+ // generic version may not be in the processor cache.
+#ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS
+ if (size <= 16)
+ return crc64_generic(buf, size, crc);
+#endif
+
+/*
+#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
+ // See crc64_dispatch(). This would be the alternative which uses
+ // locking and doesn't use crc64_dispatch(). Note that on Windows
+ // this method needs Vista threads.
+ mythread_once(crc64_set_func);
+#endif
+*/
+
+ return crc64_func(buf, size, crc);
+
+#elif defined(CRC_CLMUL)
+ // If CLMUL is used unconditionally without runtime CPU detection
+ // then omitting the generic version and its 8 KiB lookup table
+ // makes the library smaller.
+ //
+ // FIXME: Lookup table isn't currently omitted on 32-bit x86,
+ // see crc64_table.c.
+ return crc64_clmul(buf, size, crc);
+
+#else
+ return crc64_generic(buf, size, crc);
+#endif
+}
diff --git a/contrib/libs/lzma/liblzma/check/crc64_table.c b/contrib/libs/lzma/liblzma/check/crc64_table.c
index fa334df321..307846ab14 100644
--- a/contrib/libs/lzma/liblzma/check/crc64_table.c
+++ b/contrib/libs/lzma/liblzma/check/crc64_table.c
@@ -12,11 +12,24 @@
#include "common.h"
+
+// FIXME: Compared to crc64_fast.c this has to check for __x86_64__ too
+// so that in 32-bit builds crc64_x86.S won't break due to a missing table.
+#if (defined(__x86_64__) && defined(__SSSE3__) \
+ && defined(__SSE4_1__) && defined(__PCLMUL__)) \
+ || (defined(__e2k__) && __iset__ >= 6)
+// No table needed but something has to be exported to keep some toolchains
+// happy. Also use a declaration to silence compiler warnings.
+extern const char lzma_crc64_dummy;
+const char lzma_crc64_dummy;
+
+#else
// Having the declaration here silences clang -Wmissing-variable-declarations.
extern const uint64_t lzma_crc64_table[4][256];
-#ifdef WORDS_BIGENDIAN
-# error #include "crc64_table_be.h"
-#else
-# include "crc64_table_le.h"
+# if defined(WORDS_BIGENDIAN)
+# error #include "crc64_table_be.h"
+# else
+# include "crc64_table_le.h"
+# endif
#endif
diff --git a/contrib/libs/lzma/liblzma/common/alone_decoder.c b/contrib/libs/lzma/liblzma/common/alone_decoder.c
index 239b230ef1..1dc85badf9 100644
--- a/contrib/libs/lzma/liblzma/common/alone_decoder.c
+++ b/contrib/libs/lzma/liblzma/common/alone_decoder.c
@@ -110,12 +110,24 @@ alone_decode(void *coder_ptr, const lzma_allocator *allocator,
// Another hack to ditch false positives: Assume that
// if the uncompressed size is known, it must be less
// than 256 GiB.
+ //
+ // FIXME? Without picky we allow > LZMA_VLI_MAX which doesn't
+ // really matter in this specific situation (> LZMA_VLI_MAX is
+ // safe in the LZMA decoder) but it's somewhat weird still.
if (coder->picky
&& coder->uncompressed_size != LZMA_VLI_UNKNOWN
&& coder->uncompressed_size
>= (LZMA_VLI_C(1) << 38))
return LZMA_FORMAT_ERROR;
+ // Use LZMA_FILTER_LZMA1EXT features to specify the
+ // uncompressed size and that the end marker is allowed
+ // even when the uncompressed size is known. Both .lzma
+ // header and LZMA1EXT use UINT64_MAX indicate that size
+ // is unknown.
+ coder->options.ext_flags = LZMA_LZMA1EXT_ALLOW_EOPM;
+ lzma_set_ext_size(coder->options, coder->uncompressed_size);
+
// Calculate the memory usage so that it is ready
// for SEQ_CODER_INIT.
coder->memusage = lzma_lzma_decoder_memusage(&coder->options)
@@ -132,6 +144,7 @@ alone_decode(void *coder_ptr, const lzma_allocator *allocator,
lzma_filter_info filters[2] = {
{
+ .id = LZMA_FILTER_LZMA1EXT,
.init = &lzma_lzma_decoder_init,
.options = &coder->options,
}, {
@@ -139,14 +152,8 @@ alone_decode(void *coder_ptr, const lzma_allocator *allocator,
}
};
- const lzma_ret ret = lzma_next_filter_init(&coder->next,
- allocator, filters);
- if (ret != LZMA_OK)
- return ret;
-
- // Use a hack to set the uncompressed size.
- lzma_lz_decoder_uncompressed(coder->next.coder,
- coder->uncompressed_size);
+ return_if_error(lzma_next_filter_init(&coder->next,
+ allocator, filters));
coder->sequence = SEQ_CODE;
break;
diff --git a/contrib/libs/lzma/liblzma/common/alone_encoder.c b/contrib/libs/lzma/liblzma/common/alone_encoder.c
index 96c1db70cc..c9392f3769 100644
--- a/contrib/libs/lzma/liblzma/common/alone_encoder.c
+++ b/contrib/libs/lzma/liblzma/common/alone_encoder.c
@@ -129,6 +129,7 @@ alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
// Initialize the LZMA encoder.
const lzma_filter_info filters[2] = {
{
+ .id = LZMA_FILTER_LZMA1,
.init = &lzma_lzma_encoder_init,
.options = (void *)(options),
}, {
diff --git a/contrib/libs/lzma/liblzma/common/auto_decoder.c b/contrib/libs/lzma/liblzma/common/auto_decoder.c
index 6895c7ccf7..2a5c0894d1 100644
--- a/contrib/libs/lzma/liblzma/common/auto_decoder.c
+++ b/contrib/libs/lzma/liblzma/common/auto_decoder.c
@@ -1,7 +1,7 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file auto_decoder.c
-/// \brief Autodetect between .xz Stream and .lzma (LZMA_Alone) formats
+/// \brief Autodetect between .xz, .lzma (LZMA_Alone), and .lz (lzip)
//
// Author: Lasse Collin
//
@@ -12,10 +12,13 @@
#include "stream_decoder.h"
#include "alone_decoder.h"
+#ifdef HAVE_LZIP_DECODER
+# include "lzip_decoder.h"
+#endif
typedef struct {
- /// Stream decoder or LZMA_Alone decoder
+ /// .xz Stream decoder, LZMA_Alone decoder, or lzip decoder
lzma_next_coder next;
uint64_t memlimit;
@@ -46,14 +49,22 @@ auto_decode(void *coder_ptr, const lzma_allocator *allocator,
// SEQ_CODE even if we return some LZMA_*_CHECK.
coder->sequence = SEQ_CODE;
- // Detect the file format. For now this is simple, since if
- // it doesn't start with 0xFD (the first magic byte of the
- // new format), it has to be LZMA_Alone, or something that
- // we don't support at all.
+ // Detect the file format. .xz files start with 0xFD which
+ // cannot be the first byte of .lzma (LZMA_Alone) format.
+ // The .lz format starts with 0x4C which could be the
+ // first byte of a .lzma file but luckily it would mean
+ // lc/lp/pb being 4/3/1 which liblzma doesn't support because
+ // lc + lp > 4. So using just 0x4C to detect .lz is OK here.
if (in[*in_pos] == 0xFD) {
return_if_error(lzma_stream_decoder_init(
&coder->next, allocator,
coder->memlimit, coder->flags));
+#ifdef HAVE_LZIP_DECODER
+ } else if (in[*in_pos] == 0x4C) {
+ return_if_error(lzma_lzip_decoder_init(
+ &coder->next, allocator,
+ coder->memlimit, coder->flags));
+#endif
} else {
return_if_error(lzma_alone_decoder_init(&coder->next,
allocator, coder->memlimit, true));
@@ -86,8 +97,8 @@ auto_decode(void *coder_ptr, const lzma_allocator *allocator,
// Fall through
case SEQ_FINISH:
- // When LZMA_DECODE_CONCATENATED was used and we were decoding
- // LZMA_Alone file, we need to check check that there is no
+ // When LZMA_CONCATENATED was used and we were decoding
+ // a LZMA_Alone file, we need to check that there is no
// trailing garbage and wait for LZMA_FINISH.
if (*in_pos < in_size)
return LZMA_DATA_ERROR;
diff --git a/contrib/libs/lzma/liblzma/common/block_buffer_encoder.c b/contrib/libs/lzma/liblzma/common/block_buffer_encoder.c
index 39e263aa47..a47342efd0 100644
--- a/contrib/libs/lzma/liblzma/common/block_buffer_encoder.c
+++ b/contrib/libs/lzma/liblzma/common/block_buffer_encoder.c
@@ -325,6 +325,24 @@ lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
}
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2",
+ lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+ lzma_nothrow lzma_attr_warn_unused_result
+ __attribute__((__alias__("lzma_block_uncomp_encode_52")));
+
+LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2",
+ lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+#define lzma_block_uncomp_encode lzma_block_uncomp_encode_52
+#endif
extern LZMA_API(lzma_ret)
lzma_block_uncomp_encode(lzma_block *block,
const uint8_t *in, size_t in_size,
diff --git a/contrib/libs/lzma/liblzma/common/block_decoder.c b/contrib/libs/lzma/liblzma/common/block_decoder.c
index 075bd279ff..4827e0f046 100644
--- a/contrib/libs/lzma/liblzma/common/block_decoder.c
+++ b/contrib/libs/lzma/liblzma/common/block_decoder.c
@@ -40,6 +40,9 @@ typedef struct {
/// is unknown.
lzma_vli compressed_limit;
+ /// Maximum allowed Uncompressed Size.
+ lzma_vli uncompressed_limit;
+
/// Position when reading the Check field
size_t check_pos;
@@ -52,21 +55,6 @@ typedef struct {
static inline bool
-update_size(lzma_vli *size, lzma_vli add, lzma_vli limit)
-{
- if (limit > LZMA_VLI_MAX)
- limit = LZMA_VLI_MAX;
-
- if (limit < *size || limit - *size < add)
- return true;
-
- *size += add;
-
- return false;
-}
-
-
-static inline bool
is_size_valid(lzma_vli size, lzma_vli reference)
{
return reference == LZMA_VLI_UNKNOWN || reference == size;
@@ -86,21 +74,54 @@ block_decode(void *coder_ptr, const lzma_allocator *allocator,
const size_t in_start = *in_pos;
const size_t out_start = *out_pos;
+ // Limit the amount of input and output space that we give
+ // to the raw decoder based on the information we have
+ // (or don't have) from Block Header.
+ const size_t in_stop = *in_pos + (size_t)my_min(
+ in_size - *in_pos,
+ coder->compressed_limit - coder->compressed_size);
+ const size_t out_stop = *out_pos + (size_t)my_min(
+ out_size - *out_pos,
+ coder->uncompressed_limit - coder->uncompressed_size);
+
const lzma_ret ret = coder->next.code(coder->next.coder,
- allocator, in, in_pos, in_size,
- out, out_pos, out_size, action);
+ allocator, in, in_pos, in_stop,
+ out, out_pos, out_stop, action);
const size_t in_used = *in_pos - in_start;
const size_t out_used = *out_pos - out_start;
- // NOTE: We compare to compressed_limit here, which prevents
- // the total size of the Block growing past LZMA_VLI_MAX.
- if (update_size(&coder->compressed_size, in_used,
- coder->compressed_limit)
- || update_size(&coder->uncompressed_size,
- out_used,
- coder->block->uncompressed_size))
- return LZMA_DATA_ERROR;
+ // Because we have limited the input and output sizes,
+ // we know that these cannot grow too big or overflow.
+ coder->compressed_size += in_used;
+ coder->uncompressed_size += out_used;
+
+ if (ret == LZMA_OK) {
+ const bool comp_done = coder->compressed_size
+ == coder->block->compressed_size;
+ const bool uncomp_done = coder->uncompressed_size
+ == coder->block->uncompressed_size;
+
+ // If both input and output amounts match the sizes
+ // in Block Header but we still got LZMA_OK instead
+ // of LZMA_STREAM_END, the file is broken.
+ if (comp_done && uncomp_done)
+ return LZMA_DATA_ERROR;
+
+ // If the decoder has consumed all the input that it
+ // needs but it still couldn't fill the output buffer
+ // or return LZMA_STREAM_END, the file is broken.
+ if (comp_done && *out_pos < out_size)
+ return LZMA_DATA_ERROR;
+
+ // If the decoder has produced all the output but
+ // it still didn't return LZMA_STREAM_END or consume
+ // more input (for example, detecting an end of
+ // payload marker may need more input but produce
+ // no output) the file is broken.
+ if (uncomp_done && *in_pos < in_size)
+ return LZMA_DATA_ERROR;
+ }
if (!coder->ignore_check)
lzma_check_update(&coder->check, coder->block->check,
@@ -230,6 +251,14 @@ lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
- lzma_check_size(block->check)
: block->compressed_size;
+ // With Uncompressed Size this is simpler. If Block Header lacks
+ // the size info, then LZMA_VLI_MAX is the maximum possible
+ // Uncompressed Size.
+ coder->uncompressed_limit
+ = block->uncompressed_size == LZMA_VLI_UNKNOWN
+ ? LZMA_VLI_MAX
+ : block->uncompressed_size;
+
// Initialize the check. It's caller's problem if the Check ID is not
// supported, and the Block decoder cannot verify the Check field.
// Caller can test lzma_check_is_supported(block->check).
diff --git a/contrib/libs/lzma/liblzma/common/block_encoder.c b/contrib/libs/lzma/liblzma/common/block_encoder.c
index 168846ad68..520ecc5a49 100644
--- a/contrib/libs/lzma/liblzma/common/block_encoder.c
+++ b/contrib/libs/lzma/liblzma/common/block_encoder.c
@@ -217,6 +217,7 @@ lzma_block_encoder(lzma_stream *strm, lzma_block *block)
lzma_next_strm_init(lzma_block_encoder_init, strm, block);
strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
strm->internal->supported_actions[LZMA_FINISH] = true;
return LZMA_OK;
diff --git a/contrib/libs/lzma/liblzma/common/block_header_decoder.c b/contrib/libs/lzma/liblzma/common/block_header_decoder.c
index 2e1135dd63..c4935dcf46 100644
--- a/contrib/libs/lzma/liblzma/common/block_header_decoder.c
+++ b/contrib/libs/lzma/liblzma/common/block_header_decoder.c
@@ -14,22 +14,6 @@
#include "check.h"
-static void
-free_properties(lzma_block *block, const lzma_allocator *allocator)
-{
- // Free allocated filter options. The last array member is not
- // touched after the initialization in the beginning of
- // lzma_block_header_decode(), so we don't need to touch that here.
- for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) {
- lzma_free(block->filters[i].options, allocator);
- block->filters[i].id = LZMA_VLI_UNKNOWN;
- block->filters[i].options = NULL;
- }
-
- return;
-}
-
-
extern LZMA_API(lzma_ret)
lzma_block_header_decode(lzma_block *block,
const lzma_allocator *allocator, const uint8_t *in)
@@ -39,6 +23,10 @@ lzma_block_header_decode(lzma_block *block,
// are invalid or over 63 bits, or if the header is too small
// to contain the claimed information.
+ // Catch unexpected NULL pointers.
+ if (block == NULL || block->filters == NULL || in == NULL)
+ return LZMA_PROG_ERROR;
+
// Initialize the filter options array. This way the caller can
// safely free() the options even if an error occurs in this function.
for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) {
@@ -67,8 +55,11 @@ lzma_block_header_decode(lzma_block *block,
const size_t in_size = block->header_size - 4;
// Verify CRC32
- if (lzma_crc32(in, in_size, 0) != read32le(in + in_size))
+ if (lzma_crc32(in, in_size, 0) != read32le(in + in_size)) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
return LZMA_DATA_ERROR;
+#endif
+ }
// Check for unsupported flags.
if (in[1] & 0x3C)
@@ -104,7 +95,7 @@ lzma_block_header_decode(lzma_block *block,
&block->filters[i], allocator,
in, &in_pos, in_size);
if (ret != LZMA_OK) {
- free_properties(block, allocator);
+ lzma_filters_free(block->filters, allocator);
return ret;
}
}
@@ -112,7 +103,7 @@ lzma_block_header_decode(lzma_block *block,
// Padding
while (in_pos < in_size) {
if (in[in_pos++] != 0x00) {
- free_properties(block, allocator);
+ lzma_filters_free(block->filters, allocator);
// Possibly some new field present so use
// LZMA_OPTIONS_ERROR instead of LZMA_DATA_ERROR.
diff --git a/contrib/libs/lzma/liblzma/common/common.c b/contrib/libs/lzma/liblzma/common/common.c
index cf714e5e43..a708fdf187 100644
--- a/contrib/libs/lzma/liblzma/common/common.c
+++ b/contrib/libs/lzma/liblzma/common/common.c
@@ -211,7 +211,6 @@ lzma_code(lzma_stream *strm, lzma_action action)
|| strm->reserved_ptr2 != NULL
|| strm->reserved_ptr3 != NULL
|| strm->reserved_ptr4 != NULL
- || strm->reserved_int1 != 0
|| strm->reserved_int2 != 0
|| strm->reserved_int3 != 0
|| strm->reserved_int4 != 0
@@ -299,9 +298,7 @@ lzma_code(lzma_stream *strm, lzma_action action)
strm->internal->avail_in = strm->avail_in;
- // Cast is needed to silence a warning about LZMA_TIMED_OUT, which
- // isn't part of lzma_ret enumeration.
- switch ((unsigned int)(ret)) {
+ switch (ret) {
case LZMA_OK:
// Don't return LZMA_BUF_ERROR when it happens the first time.
// This is to avoid returning LZMA_BUF_ERROR when avail_out
@@ -322,6 +319,17 @@ lzma_code(lzma_stream *strm, lzma_action action)
ret = LZMA_OK;
break;
+ case LZMA_SEEK_NEEDED:
+ strm->internal->allow_buf_error = false;
+
+ // If LZMA_FINISH was used, reset it back to the
+ // LZMA_RUN-based state so that new input can be supplied
+ // by the application.
+ if (strm->internal->sequence == ISEQ_FINISH)
+ strm->internal->sequence = ISEQ_RUN;
+
+ break;
+
case LZMA_STREAM_END:
if (strm->internal->sequence == ISEQ_SYNC_FLUSH
|| strm->internal->sequence == ISEQ_FULL_FLUSH
@@ -366,6 +374,20 @@ lzma_end(lzma_stream *strm)
}
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_get_progress@XZ_5.2.2",
+ void, lzma_get_progress_522)(lzma_stream *strm,
+ uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow
+ __attribute__((__alias__("lzma_get_progress_52")));
+
+LZMA_SYMVER_API("lzma_get_progress@@XZ_5.2",
+ void, lzma_get_progress_52)(lzma_stream *strm,
+ uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow;
+
+#define lzma_get_progress lzma_get_progress_52
+#endif
extern LZMA_API(void)
lzma_get_progress(lzma_stream *strm,
uint64_t *progress_in, uint64_t *progress_out)
diff --git a/contrib/libs/lzma/liblzma/common/common.h b/contrib/libs/lzma/liblzma/common/common.h
index b3d3b7a059..11fec52c59 100644
--- a/contrib/libs/lzma/liblzma/common/common.h
+++ b/contrib/libs/lzma/liblzma/common/common.h
@@ -34,6 +34,59 @@
#include "lzma.h"
+// This is for detecting modern GCC and Clang attributes
+// like __symver__ in GCC >= 10.
+#ifdef __has_attribute
+# define lzma_has_attribute(attr) __has_attribute(attr)
+#else
+# define lzma_has_attribute(attr) 0
+#endif
+
+// The extra symbol versioning in the C files may only be used when
+// building a shared library. If HAVE_SYMBOL_VERSIONS_LINUX is defined
+// to 2 then symbol versioning is done only if also PIC is defined.
+// By default Libtool defines PIC when building a shared library and
+// doesn't define it when building a static library but it can be
+// overriden with --with-pic and --without-pic. configure let's rely
+// on PIC if neither --with-pic or --without-pic was used.
+#if defined(HAVE_SYMBOL_VERSIONS_LINUX) \
+ && (HAVE_SYMBOL_VERSIONS_LINUX == 2 && !defined(PIC))
+# undef HAVE_SYMBOL_VERSIONS_LINUX
+#endif
+
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// To keep link-time optimization (LTO, -flto) working with GCC,
+// the __symver__ attribute must be used instead of __asm__(".symver ...").
+// Otherwise the symbol versions may be lost, resulting in broken liblzma
+// that has wrong default versions in the exported symbol list!
+// The attribute was added in GCC 10; LTO with older GCC is not supported.
+//
+// To keep -Wmissing-prototypes happy, use LZMA_SYMVER_API only with function
+// declarations (including those with __alias__ attribute) and LZMA_API with
+// the function definitions. This means a little bit of silly copy-and-paste
+// between declarations and definitions though.
+//
+// As of GCC 12.2, the __symver__ attribute supports only @ and @@ but the
+// very convenient @@@ isn't supported (it's supported by GNU assembler
+// since 2000). When using @@ instead of @@@, the internal name must not be
+// the same as the external name to avoid problems in some situations. This
+// is why "#define foo_52 foo" is needed for the default symbol versions.
+//
+// __has_attribute is supported before GCC 10 and it is supported in Clang 14
+// too (which doesn't support __symver__) so use it to detect if __symver__
+// is available. This should be far more reliable than looking at compiler
+// version macros as nowadays especially __GNUC__ is defined by many compilers.
+# if lzma_has_attribute(__symver__)
+# define LZMA_SYMVER_API(extnamever, type, intname) \
+ extern __attribute__((__symver__(extnamever))) \
+ LZMA_API(type) intname
+# else
+# define LZMA_SYMVER_API(extnamever, type, intname) \
+ __asm__(".symver " #intname "," extnamever); \
+ extern LZMA_API(type) intname
+# endif
+#endif
+
// These allow helping the compiler in some often-executed branches, whose
// result is almost always the same.
#ifdef __GNUC__
@@ -67,14 +120,15 @@
#define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62)
-/// Supported flags that can be passed to lzma_stream_decoder()
-/// or lzma_auto_decoder().
+/// Supported flags that can be passed to lzma_stream_decoder(),
+/// lzma_auto_decoder(), or lzma_stream_decoder_mt().
#define LZMA_SUPPORTED_FLAGS \
( LZMA_TELL_NO_CHECK \
| LZMA_TELL_UNSUPPORTED_CHECK \
| LZMA_TELL_ANY_CHECK \
| LZMA_IGNORE_CHECK \
- | LZMA_CONCATENATED )
+ | LZMA_CONCATENATED \
+ | LZMA_FAIL_FAST )
/// Largest valid lzma_action value as unsigned integer.
@@ -83,9 +137,12 @@
/// Special return value (lzma_ret) to indicate that a timeout was reached
/// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to
-/// LZMA_OK in lzma_code(). This is not in the lzma_ret enumeration because
-/// there's no need to have it in the public API.
-#define LZMA_TIMED_OUT 32
+/// LZMA_OK in lzma_code().
+#define LZMA_TIMED_OUT LZMA_RET_INTERNAL1
+
+/// Special return value (lzma_ret) for use in stream_decoder_mt.c to
+/// indicate Index was detected instead of a Block Header.
+#define LZMA_INDEX_DETECTED LZMA_RET_INTERNAL2
typedef struct lzma_next_coder_s lzma_next_coder;
@@ -118,8 +175,11 @@ typedef void (*lzma_end_function)(
/// an array of lzma_filter_info structures. This array is used with
/// lzma_next_filter_init to initialize the filter chain.
struct lzma_filter_info_s {
- /// Filter ID. This is used only by the encoder
- /// with lzma_filters_update().
+ /// Filter ID. This can be used to share the same initiazation
+ /// function *and* data structures with different Filter IDs
+ /// (LZMA_FILTER_LZMA1EXT does it), and also by the encoder
+ /// with lzma_filters_update() if filter chain is updated
+ /// in the middle of a raw stream or Block (LZMA_SYNC_FLUSH).
lzma_vli id;
/// Pointer to function used to initialize the filter.
@@ -173,6 +233,16 @@ struct lzma_next_coder_s {
lzma_ret (*update)(void *coder, const lzma_allocator *allocator,
const lzma_filter *filters,
const lzma_filter *reversed_filters);
+
+ /// Set how many bytes of output this coder may produce at maximum.
+ /// On success LZMA_OK must be returned.
+ /// If the filter chain as a whole cannot support this feature,
+ /// this must return LZMA_OPTIONS_ERROR.
+ /// If no input has been given to the coder and the requested limit
+ /// is too small, this must return LZMA_BUF_ERROR. If input has been
+ /// seen, LZMA_OK is allowed too.
+ lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size,
+ uint64_t out_limit);
};
@@ -188,6 +258,7 @@ struct lzma_next_coder_s {
.get_check = NULL, \
.memconfig = NULL, \
.update = NULL, \
+ .set_out_limit = NULL, \
}
diff --git a/contrib/libs/lzma/liblzma/common/file_info.c b/contrib/libs/lzma/liblzma/common/file_info.c
new file mode 100644
index 0000000000..a6b7e145ae
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/common/file_info.c
@@ -0,0 +1,855 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file file_info.c
+/// \brief Decode .xz file information into a lzma_index structure
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "index_decoder.h"
+
+
+typedef struct {
+ enum {
+ SEQ_MAGIC_BYTES,
+ SEQ_PADDING_SEEK,
+ SEQ_PADDING_DECODE,
+ SEQ_FOOTER,
+ SEQ_INDEX_INIT,
+ SEQ_INDEX_DECODE,
+ SEQ_HEADER_DECODE,
+ SEQ_HEADER_COMPARE,
+ } sequence;
+
+ /// Absolute position of in[*in_pos] in the file. All code that
+ /// modifies *in_pos also updates this. seek_to_pos() needs this
+ /// to determine if we need to request the application to seek for
+ /// us or if we can do the seeking internally by adjusting *in_pos.
+ uint64_t file_cur_pos;
+
+ /// This refers to absolute positions of interesting parts of the
+ /// input file. Sometimes it points to the *beginning* of a specific
+ /// field and sometimes to the *end* of a field. The current target
+ /// position at each moment is explained in the comments.
+ uint64_t file_target_pos;
+
+ /// Size of the .xz file (from the application).
+ uint64_t file_size;
+
+ /// Index decoder
+ lzma_next_coder index_decoder;
+
+ /// Number of bytes remaining in the Index field that is currently
+ /// being decoded.
+ lzma_vli index_remaining;
+
+ /// The Index decoder will store the decoded Index in this pointer.
+ lzma_index *this_index;
+
+ /// Amount of Stream Padding in the current Stream.
+ lzma_vli stream_padding;
+
+ /// The final combined index is collected here.
+ lzma_index *combined_index;
+
+ /// Pointer from the application where to store the index information
+ /// after successful decoding.
+ lzma_index **dest_index;
+
+ /// Pointer to lzma_stream.seek_pos to be used when returning
+ /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
+ uint64_t *external_seek_pos;
+
+ /// Memory usage limit
+ uint64_t memlimit;
+
+ /// Stream Flags from the very beginning of the file.
+ lzma_stream_flags first_header_flags;
+
+ /// Stream Flags from Stream Header of the current Stream.
+ lzma_stream_flags header_flags;
+
+ /// Stream Flags from Stream Footer of the current Stream.
+ lzma_stream_flags footer_flags;
+
+ size_t temp_pos;
+ size_t temp_size;
+ uint8_t temp[8192];
+
+} lzma_file_info_coder;
+
+
+/// Copies data from in[*in_pos] into coder->temp until
+/// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
+/// in sync with *in_pos. Returns true if more input is needed.
+static bool
+fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
+ size_t *restrict in_pos, size_t in_size)
+{
+ coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
+ coder->temp, &coder->temp_pos, coder->temp_size);
+ return coder->temp_pos < coder->temp_size;
+}
+
+
+/// Seeks to the absolute file position specified by target_pos.
+/// This tries to do the seeking by only modifying *in_pos, if possible.
+/// The main benefit of this is that if one passes the whole file at once
+/// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
+/// as all the seeking can be done by adjusting *in_pos in this function.
+///
+/// Returns true if an external seek is needed and the caller must return
+/// LZMA_SEEK_NEEDED.
+static bool
+seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
+ size_t in_start, size_t *in_pos, size_t in_size)
+{
+ // The input buffer doesn't extend beyond the end of the file.
+ // This has been checked by file_info_decode() already.
+ assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
+
+ const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
+ const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
+
+ bool external_seek_needed;
+
+ if (target_pos >= pos_min && target_pos <= pos_max) {
+ // The requested position is available in the current input
+ // buffer or right after it. That is, in a corner case we
+ // end up setting *in_pos == in_size and thus will immediately
+ // need new input bytes from the application.
+ *in_pos += (size_t)(target_pos - coder->file_cur_pos);
+ external_seek_needed = false;
+ } else {
+ // Ask the application to seek the input file.
+ *coder->external_seek_pos = target_pos;
+ external_seek_needed = true;
+
+ // Mark the whole input buffer as used. This way
+ // lzma_stream.total_in will have a better estimate
+ // of the amount of data read. It still won't be perfect
+ // as the value will depend on the input buffer size that
+ // the application uses, but it should be good enough for
+ // those few who want an estimate.
+ *in_pos = in_size;
+ }
+
+ // After seeking (internal or external) the current position
+ // will match the requested target position.
+ coder->file_cur_pos = target_pos;
+
+ return external_seek_needed;
+}
+
+
+/// The caller sets coder->file_target_pos so that it points to the *end*
+/// of the desired file position. This function then determines how far
+/// backwards from that position we can seek. After seeking fill_temp()
+/// can be used to read data into coder->temp. When fill_temp() has finished,
+/// coder->temp[coder->temp_size] will match coder->file_target_pos.
+///
+/// This also validates that coder->target_file_pos is sane in sense that
+/// we aren't trying to seek too far backwards (too close or beyond the
+/// beginning of the file).
+static lzma_ret
+reverse_seek(lzma_file_info_coder *coder,
+ size_t in_start, size_t *in_pos, size_t in_size)
+{
+ // Check that there is enough data before the target position
+ // to contain at least Stream Header and Stream Footer. If there
+ // isn't, the file cannot be valid.
+ if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
+ return LZMA_DATA_ERROR;
+
+ coder->temp_pos = 0;
+
+ // The Stream Header at the very beginning of the file gets handled
+ // specially in SEQ_MAGIC_BYTES and thus we will never need to seek
+ // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
+ // we avoid a useless external seek after SEQ_MAGIC_BYTES if the
+ // application uses an extremely small input buffer and the input
+ // file is very small.
+ if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
+ < sizeof(coder->temp))
+ coder->temp_size = (size_t)(coder->file_target_pos
+ - LZMA_STREAM_HEADER_SIZE);
+ else
+ coder->temp_size = sizeof(coder->temp);
+
+ // The above if-statements guarantee this. This is important because
+ // the Stream Header/Footer decoders assume that there's at least
+ // LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
+ assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
+
+ if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
+ in_start, in_pos, in_size))
+ return LZMA_SEEK_NEEDED;
+
+ return LZMA_OK;
+}
+
+
+/// Gets the number of zero-bytes at the end of the buffer.
+static size_t
+get_padding_size(const uint8_t *buf, size_t buf_size)
+{
+ size_t padding = 0;
+ while (buf_size > 0 && buf[--buf_size] == 0x00)
+ ++padding;
+
+ return padding;
+}
+
+
+/// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
+/// is used to tell the application that Magic Bytes didn't match. In other
+/// Stream Header/Footer fields (in the middle/end of the file) it could be
+/// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
+/// is a valid Stream Header at the beginning of the file. For those cases
+/// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
+static lzma_ret
+hide_format_error(lzma_ret ret)
+{
+ if (ret == LZMA_FORMAT_ERROR)
+ ret = LZMA_DATA_ERROR;
+
+ return ret;
+}
+
+
+/// Calls the Index decoder and updates coder->index_remaining.
+/// This is a separate function because the input can be either directly
+/// from the application or from coder->temp.
+static lzma_ret
+decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, bool update_file_cur_pos)
+{
+ const size_t in_start = *in_pos;
+
+ const lzma_ret ret = coder->index_decoder.code(
+ coder->index_decoder.coder,
+ allocator, in, in_pos, in_size,
+ NULL, NULL, 0, LZMA_RUN);
+
+ coder->index_remaining -= *in_pos - in_start;
+
+ if (update_file_cur_pos)
+ coder->file_cur_pos += *in_pos - in_start;
+
+ return ret;
+}
+
+
+static lzma_ret
+file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size,
+ uint8_t *restrict out lzma_attribute((__unused__)),
+ size_t *restrict out_pos lzma_attribute((__unused__)),
+ size_t out_size lzma_attribute((__unused__)),
+ lzma_action action lzma_attribute((__unused__)))
+{
+ lzma_file_info_coder *coder = coder_ptr;
+ const size_t in_start = *in_pos;
+
+ // If the caller provides input past the end of the file, trim
+ // the extra bytes from the buffer so that we won't read too far.
+ assert(coder->file_size >= coder->file_cur_pos);
+ if (coder->file_size - coder->file_cur_pos < in_size - in_start)
+ in_size = in_start
+ + (size_t)(coder->file_size - coder->file_cur_pos);
+
+ while (true)
+ switch (coder->sequence) {
+ case SEQ_MAGIC_BYTES:
+ // Decode the Stream Header at the beginning of the file
+ // first to check if the Magic Bytes match. The flags
+ // are stored in coder->first_header_flags so that we
+ // don't need to seek to it again.
+ //
+ // Check that the file is big enough to contain at least
+ // Stream Header.
+ if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_FORMAT_ERROR;
+
+ // Read the Stream Header field into coder->temp.
+ if (fill_temp(coder, in, in_pos, in_size))
+ return LZMA_OK;
+
+ // This is the only Stream Header/Footer decoding where we
+ // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
+ // match. Elsewhere it will be converted to LZMA_DATA_ERROR.
+ return_if_error(lzma_stream_header_decode(
+ &coder->first_header_flags, coder->temp));
+
+ // Now that we know that the Magic Bytes match, check the
+ // file size. It's better to do this here after checking the
+ // Magic Bytes since this way we can give LZMA_FORMAT_ERROR
+ // instead of LZMA_DATA_ERROR when the Magic Bytes don't
+ // match in a file that is too big or isn't a multiple of
+ // four bytes.
+ if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
+ return LZMA_DATA_ERROR;
+
+ // Start looking for Stream Padding and Stream Footer
+ // at the end of the file.
+ coder->file_target_pos = coder->file_size;
+
+ // Fall through
+
+ case SEQ_PADDING_SEEK:
+ coder->sequence = SEQ_PADDING_DECODE;
+ return_if_error(reverse_seek(
+ coder, in_start, in_pos, in_size));
+
+ // Fall through
+
+ case SEQ_PADDING_DECODE: {
+ // Copy to coder->temp first. This keeps the code simpler if
+ // the application only provides input a few bytes at a time.
+ if (fill_temp(coder, in, in_pos, in_size))
+ return LZMA_OK;
+
+ // Scan the buffer backwards to get the size of the
+ // Stream Padding field (if any).
+ const size_t new_padding = get_padding_size(
+ coder->temp, coder->temp_size);
+ coder->stream_padding += new_padding;
+
+ // Set the target position to the beginning of Stream Padding
+ // that has been observed so far. If all Stream Padding has
+ // been seen, then the target position will be at the end
+ // of the Stream Footer field.
+ coder->file_target_pos -= new_padding;
+
+ if (new_padding == coder->temp_size) {
+ // The whole buffer was padding. Seek backwards in
+ // the file to get more input.
+ coder->sequence = SEQ_PADDING_SEEK;
+ break;
+ }
+
+ // Size of Stream Padding must be a multiple of 4 bytes.
+ if (coder->stream_padding & 3)
+ return LZMA_DATA_ERROR;
+
+ coder->sequence = SEQ_FOOTER;
+
+ // Calculate the amount of non-padding data in coder->temp.
+ coder->temp_size -= new_padding;
+ coder->temp_pos = coder->temp_size;
+
+ // We can avoid an external seek if the whole Stream Footer
+ // is already in coder->temp. In that case SEQ_FOOTER won't
+ // read more input and will find the Stream Footer from
+ // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
+ //
+ // Otherwise we will need to seek. The seeking is done so
+ // that Stream Footer wil be at the end of coder->temp.
+ // This way it's likely that we also get a complete Index
+ // field into coder->temp without needing a separate seek
+ // for that (unless the Index field is big).
+ if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
+ return_if_error(reverse_seek(
+ coder, in_start, in_pos, in_size));
+ }
+
+ // Fall through
+
+ case SEQ_FOOTER:
+ // Copy the Stream Footer field into coder->temp.
+ // If Stream Footer was already available in coder->temp
+ // in SEQ_PADDING_DECODE, then this does nothing.
+ if (fill_temp(coder, in, in_pos, in_size))
+ return LZMA_OK;
+
+ // Make coder->file_target_pos and coder->temp_size point
+ // to the beginning of Stream Footer and thus to the end
+ // of the Index field. coder->temp_pos will be updated
+ // a bit later.
+ coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
+ coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
+
+ // Decode Stream Footer.
+ return_if_error(hide_format_error(lzma_stream_footer_decode(
+ &coder->footer_flags,
+ coder->temp + coder->temp_size)));
+
+ // Check that we won't seek past the beginning of the file.
+ //
+ // LZMA_STREAM_HEADER_SIZE is added because there must be
+ // space for Stream Header too even though we won't seek
+ // there before decoding the Index field.
+ //
+ // There's no risk of integer overflow here because
+ // Backward Size cannot be greater than 2^34.
+ if (coder->file_target_pos < coder->footer_flags.backward_size
+ + LZMA_STREAM_HEADER_SIZE)
+ return LZMA_DATA_ERROR;
+
+ // Set the target position to the beginning of the Index field.
+ coder->file_target_pos -= coder->footer_flags.backward_size;
+ coder->sequence = SEQ_INDEX_INIT;
+
+ // We can avoid an external seek if the whole Index field is
+ // already available in coder->temp.
+ if (coder->temp_size >= coder->footer_flags.backward_size) {
+ // Set coder->temp_pos to point to the beginning
+ // of the Index.
+ coder->temp_pos = coder->temp_size
+ - coder->footer_flags.backward_size;
+ } else {
+ // These are set to zero to indicate that there's no
+ // useful data (Index or anything else) in coder->temp.
+ coder->temp_pos = 0;
+ coder->temp_size = 0;
+
+ // Seek to the beginning of the Index field.
+ if (seek_to_pos(coder, coder->file_target_pos,
+ in_start, in_pos, in_size))
+ return LZMA_SEEK_NEEDED;
+ }
+
+ // Fall through
+
+ case SEQ_INDEX_INIT: {
+ // Calculate the amount of memory already used by the earlier
+ // Indexes so that we know how big memory limit to pass to
+ // the Index decoder.
+ //
+ // NOTE: When there are multiple Streams, the separate
+ // lzma_index structures can use more RAM (as measured by
+ // lzma_index_memused()) than the final combined lzma_index.
+ // Thus memlimit may need to be slightly higher than the final
+ // calculated memory usage will be. This is perhaps a bit
+ // confusing to the application, but I think it shouldn't
+ // cause problems in practice.
+ uint64_t memused = 0;
+ if (coder->combined_index != NULL) {
+ memused = lzma_index_memused(coder->combined_index);
+ assert(memused <= coder->memlimit);
+ if (memused > coder->memlimit) // Extra sanity check
+ return LZMA_PROG_ERROR;
+ }
+
+ // Initialize the Index decoder.
+ return_if_error(lzma_index_decoder_init(
+ &coder->index_decoder, allocator,
+ &coder->this_index,
+ coder->memlimit - memused));
+
+ coder->index_remaining = coder->footer_flags.backward_size;
+ coder->sequence = SEQ_INDEX_DECODE;
+ }
+
+ // Fall through
+
+ case SEQ_INDEX_DECODE: {
+ // Decode (a part of) the Index. If the whole Index is already
+ // in coder->temp, read it from there. Otherwise read from
+ // in[*in_pos] onwards. Note that index_decode() updates
+ // coder->index_remaining and optionally coder->file_cur_pos.
+ lzma_ret ret;
+ if (coder->temp_size != 0) {
+ assert(coder->temp_size - coder->temp_pos
+ == coder->index_remaining);
+ ret = decode_index(coder, allocator, coder->temp,
+ &coder->temp_pos, coder->temp_size,
+ false);
+ } else {
+ // Don't give the decoder more input than the known
+ // remaining size of the Index field.
+ size_t in_stop = in_size;
+ if (in_size - *in_pos > coder->index_remaining)
+ in_stop = *in_pos
+ + (size_t)(coder->index_remaining);
+
+ ret = decode_index(coder, allocator,
+ in, in_pos, in_stop, true);
+ }
+
+ switch (ret) {
+ case LZMA_OK:
+ // If the Index docoder asks for more input when we
+ // have already given it as much input as Backward Size
+ // indicated, the file is invalid.
+ if (coder->index_remaining == 0)
+ return LZMA_DATA_ERROR;
+
+ // We cannot get here if we were reading Index from
+ // coder->temp because when reading from coder->temp
+ // we give the Index decoder exactly
+ // coder->index_remaining bytes of input.
+ assert(coder->temp_size == 0);
+
+ return LZMA_OK;
+
+ case LZMA_STREAM_END:
+ // If the decoding seems to be successful, check also
+ // that the Index decoder consumed as much input as
+ // indicated by the Backward Size field.
+ if (coder->index_remaining != 0)
+ return LZMA_DATA_ERROR;
+
+ break;
+
+ default:
+ return ret;
+ }
+
+ // Calculate how much the Index tells us to seek backwards
+ // (relative to the beginning of the Index): Total size of
+ // all Blocks plus the size of the Stream Header field.
+ // No integer overflow here because lzma_index_total_size()
+ // cannot return a value greater than LZMA_VLI_MAX.
+ const uint64_t seek_amount
+ = lzma_index_total_size(coder->this_index)
+ + LZMA_STREAM_HEADER_SIZE;
+
+ // Check that Index is sane in sense that seek_amount won't
+ // make us seek past the beginning of the file when locating
+ // the Stream Header.
+ //
+ // coder->file_target_pos still points to the beginning of
+ // the Index field.
+ if (coder->file_target_pos < seek_amount)
+ return LZMA_DATA_ERROR;
+
+ // Set the target to the beginning of Stream Header.
+ coder->file_target_pos -= seek_amount;
+
+ if (coder->file_target_pos == 0) {
+ // We would seek to the beginning of the file, but
+ // since we already decoded that Stream Header in
+ // SEQ_MAGIC_BYTES, we can use the cached value from
+ // coder->first_header_flags to avoid the seek.
+ coder->header_flags = coder->first_header_flags;
+ coder->sequence = SEQ_HEADER_COMPARE;
+ break;
+ }
+
+ coder->sequence = SEQ_HEADER_DECODE;
+
+ // Make coder->file_target_pos point to the end of
+ // the Stream Header field.
+ coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
+
+ // If coder->temp_size is non-zero, it points to the end
+ // of the Index field. Then the beginning of the Index
+ // field is at coder->temp[coder->temp_size
+ // - coder->footer_flags.backward_size].
+ assert(coder->temp_size == 0 || coder->temp_size
+ >= coder->footer_flags.backward_size);
+
+ // If coder->temp contained the whole Index, see if it has
+ // enough data to contain also the Stream Header. If so,
+ // we avoid an external seek.
+ //
+ // NOTE: This can happen only with small .xz files and only
+ // for the non-first Stream as the Stream Flags of the first
+ // Stream are cached and already handled a few lines above.
+ // So this isn't as useful as the other seek-avoidance cases.
+ if (coder->temp_size != 0 && coder->temp_size
+ - coder->footer_flags.backward_size
+ >= seek_amount) {
+ // Make temp_pos and temp_size point to the *end* of
+ // Stream Header so that SEQ_HEADER_DECODE will find
+ // the start of Stream Header from coder->temp[
+ // coder->temp_size - LZMA_STREAM_HEADER_SIZE].
+ coder->temp_pos = coder->temp_size
+ - coder->footer_flags.backward_size
+ - seek_amount
+ + LZMA_STREAM_HEADER_SIZE;
+ coder->temp_size = coder->temp_pos;
+ } else {
+ // Seek so that Stream Header will be at the end of
+ // coder->temp. With typical multi-Stream files we
+ // will usually also get the Stream Footer and Index
+ // of the *previous* Stream in coder->temp and thus
+ // won't need a separate seek for them.
+ return_if_error(reverse_seek(coder,
+ in_start, in_pos, in_size));
+ }
+ }
+
+ // Fall through
+
+ case SEQ_HEADER_DECODE:
+ // Copy the Stream Header field into coder->temp.
+ // If Stream Header was already available in coder->temp
+ // in SEQ_INDEX_DECODE, then this does nothing.
+ if (fill_temp(coder, in, in_pos, in_size))
+ return LZMA_OK;
+
+ // Make all these point to the beginning of Stream Header.
+ coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
+ coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
+ coder->temp_pos = coder->temp_size;
+
+ // Decode the Stream Header.
+ return_if_error(hide_format_error(lzma_stream_header_decode(
+ &coder->header_flags,
+ coder->temp + coder->temp_size)));
+
+ coder->sequence = SEQ_HEADER_COMPARE;
+
+ // Fall through
+
+ case SEQ_HEADER_COMPARE:
+ // Compare Stream Header against Stream Footer. They must
+ // match.
+ return_if_error(lzma_stream_flags_compare(
+ &coder->header_flags, &coder->footer_flags));
+
+ // Store the decoded Stream Flags into the Index. Use the
+ // Footer Flags because it contains Backward Size, although
+ // it shouldn't matter in practice.
+ if (lzma_index_stream_flags(coder->this_index,
+ &coder->footer_flags) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ // Store also the size of the Stream Padding field. It is
+ // needed to calculate the offsets of the Streams correctly.
+ if (lzma_index_stream_padding(coder->this_index,
+ coder->stream_padding) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ // Reset it so that it's ready for the next Stream.
+ coder->stream_padding = 0;
+
+ // Append the earlier decoded Indexes after this_index.
+ if (coder->combined_index != NULL)
+ return_if_error(lzma_index_cat(coder->this_index,
+ coder->combined_index, allocator));
+
+ coder->combined_index = coder->this_index;
+ coder->this_index = NULL;
+
+ // If the whole file was decoded, tell the caller that we
+ // are finished.
+ if (coder->file_target_pos == 0) {
+ // The combined index must indicate the same file
+ // size as was told to us at initialization.
+ assert(lzma_index_file_size(coder->combined_index)
+ == coder->file_size);
+
+ // Make the combined index available to
+ // the application.
+ *coder->dest_index = coder->combined_index;
+ coder->combined_index = NULL;
+
+ // Mark the input buffer as used since we may have
+ // done internal seeking and thus don't know how
+ // many input bytes were actually used. This way
+ // lzma_stream.total_in gets a slightly better
+ // estimate of the amount of input used.
+ *in_pos = in_size;
+ return LZMA_STREAM_END;
+ }
+
+ // We didn't hit the beginning of the file yet, so continue
+ // reading backwards in the file. If we have unprocessed
+ // data in coder->temp, use it before requesting more data
+ // from the application.
+ //
+ // coder->file_target_pos, coder->temp_size, and
+ // coder->temp_pos all point to the beginning of Stream Header
+ // and thus the end of the previous Stream in the file.
+ coder->sequence = coder->temp_size > 0
+ ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
+ break;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+}
+
+
+static lzma_ret
+file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ lzma_file_info_coder *coder = coder_ptr;
+
+ // The memory usage calculation comes from three things:
+ //
+ // (1) The Indexes that have already been decoded and processed into
+ // coder->combined_index.
+ //
+ // (2) The latest Index in coder->this_index that has been decoded but
+ // not yet put into coder->combined_index.
+ //
+ // (3) The latest Index that we have started decoding but haven't
+ // finished and thus isn't available in coder->this_index yet.
+ // Memory usage and limit information needs to be communicated
+ // from/to coder->index_decoder.
+ //
+ // Care has to be taken to not do both (2) and (3) when calculating
+ // the memory usage.
+ uint64_t combined_index_memusage = 0;
+ uint64_t this_index_memusage = 0;
+
+ // (1) If we have already successfully decoded one or more Indexes,
+ // get their memory usage.
+ if (coder->combined_index != NULL)
+ combined_index_memusage = lzma_index_memused(
+ coder->combined_index);
+
+ // Choose between (2), (3), or neither.
+ if (coder->this_index != NULL) {
+ // (2) The latest Index is available. Use its memory usage.
+ this_index_memusage = lzma_index_memused(coder->this_index);
+
+ } else if (coder->sequence == SEQ_INDEX_DECODE) {
+ // (3) The Index decoder is activate and hasn't yet stored
+ // the new index in coder->this_index. Get the memory usage
+ // information from the Index decoder.
+ //
+ // NOTE: If the Index decoder doesn't yet know how much memory
+ // it will eventually need, it will return a tiny value here.
+ uint64_t dummy;
+ if (coder->index_decoder.memconfig(coder->index_decoder.coder,
+ &this_index_memusage, &dummy, 0)
+ != LZMA_OK) {
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+ }
+
+ // Now we know the total memory usage/requirement. If we had neither
+ // old Indexes nor a new Index, this will be zero which isn't
+ // acceptable as lzma_memusage() has to return non-zero on success
+ // and even with an empty .xz file we will end up with a lzma_index
+ // that takes some memory.
+ *memusage = combined_index_memusage + this_index_memusage;
+ if (*memusage == 0)
+ *memusage = lzma_index_memusage(1, 0);
+
+ *old_memlimit = coder->memlimit;
+
+ // If requested, set a new memory usage limit.
+ if (new_memlimit != 0) {
+ if (new_memlimit < *memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ // In the condition (3) we need to tell the Index decoder
+ // its new memory usage limit.
+ if (coder->this_index == NULL
+ && coder->sequence == SEQ_INDEX_DECODE) {
+ const uint64_t idec_new_memlimit = new_memlimit
+ - combined_index_memusage;
+
+ assert(this_index_memusage > 0);
+ assert(idec_new_memlimit > 0);
+
+ uint64_t dummy1;
+ uint64_t dummy2;
+
+ if (coder->index_decoder.memconfig(
+ coder->index_decoder.coder,
+ &dummy1, &dummy2, idec_new_memlimit)
+ != LZMA_OK) {
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+ }
+
+ coder->memlimit = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_file_info_coder *coder = coder_ptr;
+
+ lzma_next_end(&coder->index_decoder, allocator);
+ lzma_index_end(coder->this_index, allocator);
+ lzma_index_end(coder->combined_index, allocator);
+
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+lzma_file_info_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator, uint64_t *seek_pos,
+ lzma_index **dest_index,
+ uint64_t memlimit, uint64_t file_size)
+{
+ lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
+
+ if (dest_index == NULL)
+ return LZMA_PROG_ERROR;
+
+ lzma_file_info_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &file_info_decode;
+ next->end = &file_info_decoder_end;
+ next->memconfig = &file_info_decoder_memconfig;
+
+ coder->index_decoder = LZMA_NEXT_CODER_INIT;
+ coder->this_index = NULL;
+ coder->combined_index = NULL;
+ }
+
+ coder->sequence = SEQ_MAGIC_BYTES;
+ coder->file_cur_pos = 0;
+ coder->file_target_pos = 0;
+ coder->file_size = file_size;
+
+ lzma_index_end(coder->this_index, allocator);
+ coder->this_index = NULL;
+
+ lzma_index_end(coder->combined_index, allocator);
+ coder->combined_index = NULL;
+
+ coder->stream_padding = 0;
+
+ coder->dest_index = dest_index;
+ coder->external_seek_pos = seek_pos;
+
+ // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
+ // won't return 0 (which would indicate an error).
+ coder->memlimit = my_max(1, memlimit);
+
+ // Prepare these for reading the first Stream Header into coder->temp.
+ coder->temp_pos = 0;
+ coder->temp_size = LZMA_STREAM_HEADER_SIZE;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
+ uint64_t memlimit, uint64_t file_size)
+{
+ lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
+ dest_index, memlimit, file_size);
+
+ // We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
+ // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
+ // combination in a sane way. Applications still need to be careful
+ // if they use LZMA_FINISH so that they remember to reset it back
+ // to LZMA_RUN after seeking if needed.
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/contrib/libs/lzma/liblzma/common/filter_common.c b/contrib/libs/lzma/liblzma/common/filter_common.c
index 9ad5d5d8e2..fa0927cf9b 100644
--- a/contrib/libs/lzma/liblzma/common/filter_common.c
+++ b/contrib/libs/lzma/liblzma/common/filter_common.c
@@ -42,6 +42,13 @@ static const struct {
.last_ok = true,
.changes_size = true,
},
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .options_size = sizeof(lzma_options_lzma),
+ .non_last_ok = false,
+ .last_ok = true,
+ .changes_size = true,
+ },
#endif
#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
{
@@ -97,6 +104,15 @@ static const struct {
.changes_size = false,
},
#endif
+#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
+ {
+ .id = LZMA_FILTER_ARM64,
+ .options_size = sizeof(lzma_options_bcj),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
{
.id = LZMA_FILTER_SPARC,
@@ -122,12 +138,16 @@ static const struct {
extern LZMA_API(lzma_ret)
-lzma_filters_copy(const lzma_filter *src, lzma_filter *dest,
+lzma_filters_copy(const lzma_filter *src, lzma_filter *real_dest,
const lzma_allocator *allocator)
{
- if (src == NULL || dest == NULL)
+ if (src == NULL || real_dest == NULL)
return LZMA_PROG_ERROR;
+ // Use a temporary destination so that the real destination
+ // will never be modied if an error occurs.
+ lzma_filter dest[LZMA_FILTERS_MAX + 1];
+
lzma_ret ret;
size_t i;
for (i = 0; src[i].id != LZMA_VLI_UNKNOWN; ++i) {
@@ -173,25 +193,53 @@ lzma_filters_copy(const lzma_filter *src, lzma_filter *dest,
}
// Terminate the filter array.
- assert(i <= LZMA_FILTERS_MAX + 1);
+ assert(i < LZMA_FILTERS_MAX + 1);
dest[i].id = LZMA_VLI_UNKNOWN;
dest[i].options = NULL;
+ // Copy it to the caller-supplied array now that we know that
+ // no errors occurred.
+ memcpy(real_dest, dest, (i + 1) * sizeof(lzma_filter));
+
return LZMA_OK;
error:
// Free the options which we have already allocated.
- while (i-- > 0) {
+ while (i-- > 0)
lzma_free(dest[i].options, allocator);
- dest[i].options = NULL;
- }
return ret;
}
-static lzma_ret
-validate_chain(const lzma_filter *filters, size_t *count)
+extern LZMA_API(void)
+lzma_filters_free(lzma_filter *filters, const lzma_allocator *allocator)
+{
+ if (filters == NULL)
+ return;
+
+ for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
+ if (i == LZMA_FILTERS_MAX) {
+ // The API says that LZMA_FILTERS_MAX + 1 is the
+ // maximum allowed size including the terminating
+ // element. Thus, we should never get here but in
+ // case there is a bug and we do anyway, don't go
+ // past the (probable) end of the array.
+ assert(0);
+ break;
+ }
+
+ lzma_free(filters[i].options, allocator);
+ filters[i].options = NULL;
+ filters[i].id = LZMA_VLI_UNKNOWN;
+ }
+
+ return;
+}
+
+
+extern lzma_ret
+lzma_validate_chain(const lzma_filter *filters, size_t *count)
{
// There must be at least one filter.
if (filters == NULL || filters[0].id == LZMA_VLI_UNKNOWN)
@@ -245,7 +293,7 @@ lzma_raw_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
{
// Do some basic validation and get the number of filters.
size_t count;
- return_if_error(validate_chain(options, &count));
+ return_if_error(lzma_validate_chain(options, &count));
// Set the filter functions and copy the options pointer.
lzma_filter_info filters[LZMA_FILTERS_MAX + 1];
@@ -298,7 +346,7 @@ lzma_raw_coder_memusage(lzma_filter_find coder_find,
// The chain has to have at least one filter.
{
size_t tmp;
- if (validate_chain(filters, &tmp) != LZMA_OK)
+ if (lzma_validate_chain(filters, &tmp) != LZMA_OK)
return UINT64_MAX;
}
diff --git a/contrib/libs/lzma/liblzma/common/filter_common.h b/contrib/libs/lzma/liblzma/common/filter_common.h
index 9390305c26..2e47bb69f7 100644
--- a/contrib/libs/lzma/liblzma/common/filter_common.h
+++ b/contrib/libs/lzma/liblzma/common/filter_common.h
@@ -35,6 +35,9 @@ typedef struct {
typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id);
+extern lzma_ret lzma_validate_chain(const lzma_filter *filters, size_t *count);
+
+
extern lzma_ret lzma_raw_coder_init(
lzma_next_coder *next, const lzma_allocator *allocator,
const lzma_filter *filters,
diff --git a/contrib/libs/lzma/liblzma/common/filter_decoder.c b/contrib/libs/lzma/liblzma/common/filter_decoder.c
index c75b0a89c3..fa53f5bdba 100644
--- a/contrib/libs/lzma/liblzma/common/filter_decoder.c
+++ b/contrib/libs/lzma/liblzma/common/filter_decoder.c
@@ -50,6 +50,12 @@ static const lzma_filter_decoder decoders[] = {
.memusage = &lzma_lzma_decoder_memusage,
.props_decode = &lzma_lzma_props_decode,
},
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_decoder_init,
+ .memusage = &lzma_lzma_decoder_memusage,
+ .props_decode = &lzma_lzma_props_decode,
+ },
#endif
#ifdef HAVE_DECODER_LZMA2
{
@@ -99,6 +105,14 @@ static const lzma_filter_decoder decoders[] = {
.props_decode = &lzma_simple_props_decode,
},
#endif
+#ifdef HAVE_DECODER_ARM64
+ {
+ .id = LZMA_FILTER_ARM64,
+ .init = &lzma_simple_arm64_decoder_init,
+ .memusage = NULL,
+ .props_decode = &lzma_simple_props_decode,
+ },
+#endif
#ifdef HAVE_DECODER_SPARC
{
.id = LZMA_FILTER_SPARC,
diff --git a/contrib/libs/lzma/liblzma/common/filter_encoder.c b/contrib/libs/lzma/liblzma/common/filter_encoder.c
index c5d8f39721..978b7a6bb5 100644
--- a/contrib/libs/lzma/liblzma/common/filter_encoder.c
+++ b/contrib/libs/lzma/liblzma/common/filter_encoder.c
@@ -59,7 +59,16 @@ static const lzma_filter_encoder encoders[] = {
.id = LZMA_FILTER_LZMA1,
.init = &lzma_lzma_encoder_init,
.memusage = &lzma_lzma_encoder_memusage,
- .block_size = NULL, // FIXME
+ .block_size = NULL, // Not needed for LZMA1
+ .props_size_get = NULL,
+ .props_size_fixed = 5,
+ .props_encode = &lzma_lzma_props_encode,
+ },
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_encoder_init,
+ .memusage = &lzma_lzma_encoder_memusage,
+ .block_size = NULL, // Not needed for LZMA1
.props_size_get = NULL,
.props_size_fixed = 5,
.props_encode = &lzma_lzma_props_encode,
@@ -70,7 +79,7 @@ static const lzma_filter_encoder encoders[] = {
.id = LZMA_FILTER_LZMA2,
.init = &lzma_lzma2_encoder_init,
.memusage = &lzma_lzma2_encoder_memusage,
- .block_size = &lzma_lzma2_block_size, // FIXME
+ .block_size = &lzma_lzma2_block_size,
.props_size_get = NULL,
.props_size_fixed = 1,
.props_encode = &lzma_lzma2_props_encode,
@@ -126,6 +135,16 @@ static const lzma_filter_encoder encoders[] = {
.props_encode = &lzma_simple_props_encode,
},
#endif
+#ifdef HAVE_ENCODER_ARM64
+ {
+ .id = LZMA_FILTER_ARM64,
+ .init = &lzma_simple_arm64_encoder_init,
+ .memusage = NULL,
+ .block_size = NULL,
+ .props_size_get = &lzma_simple_props_size,
+ .props_encode = &lzma_simple_props_encode,
+ },
+#endif
#ifdef HAVE_ENCODER_SPARC
{
.id = LZMA_FILTER_SPARC,
diff --git a/contrib/libs/lzma/liblzma/common/hardware_cputhreads.c b/contrib/libs/lzma/liblzma/common/hardware_cputhreads.c
index f468366a60..5d246d2cc0 100644
--- a/contrib/libs/lzma/liblzma/common/hardware_cputhreads.c
+++ b/contrib/libs/lzma/liblzma/common/hardware_cputhreads.c
@@ -15,6 +15,18 @@
#include "tuklib_cpucores.h"
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_cputhreads@XZ_5.2.2",
+ uint32_t, lzma_cputhreads_522)(void) lzma_nothrow
+ __attribute__((__alias__("lzma_cputhreads_52")));
+
+LZMA_SYMVER_API("lzma_cputhreads@@XZ_5.2",
+ uint32_t, lzma_cputhreads_52)(void) lzma_nothrow;
+
+#define lzma_cputhreads lzma_cputhreads_52
+#endif
extern LZMA_API(uint32_t)
lzma_cputhreads(void)
{
diff --git a/contrib/libs/lzma/liblzma/common/index.c b/contrib/libs/lzma/liblzma/common/index.c
index a41e8f3308..24ec3c10c2 100644
--- a/contrib/libs/lzma/liblzma/common/index.c
+++ b/contrib/libs/lzma/liblzma/common/index.c
@@ -656,6 +656,10 @@ lzma_index_append(lzma_index *i, const lzma_allocator *allocator,
const uint32_t index_list_size_add = lzma_vli_size(unpadded_size)
+ lzma_vli_size(uncompressed_size);
+ // Check that uncompressed size will not overflow.
+ if (uncompressed_base + uncompressed_size > LZMA_VLI_MAX)
+ return LZMA_DATA_ERROR;
+
// Check that the file size will stay within limits.
if (index_file_size(s->node.compressed_base,
compressed_base + unpadded_size, s->record_count + 1,
@@ -767,6 +771,9 @@ extern LZMA_API(lzma_ret)
lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
const lzma_allocator *allocator)
{
+ if (dest == NULL || src == NULL)
+ return LZMA_PROG_ERROR;
+
const lzma_vli dest_file_size = lzma_index_file_size(dest);
// Check that we don't exceed the file size limits.
@@ -835,6 +842,11 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
}
}
+ // dest->checks includes the check types of all except the last Stream
+ // in dest. Set the bit for the check type of the last Stream now so
+ // that it won't get lost when Stream(s) from src are appended to dest.
+ dest->checks = lzma_index_checks(dest);
+
// Add all the Streams from src to dest. Update the base offsets
// of each Stream from src.
const index_cat_info info = {
@@ -851,7 +863,7 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
dest->total_size += src->total_size;
dest->record_count += src->record_count;
dest->index_list_size += src->index_list_size;
- dest->checks = lzma_index_checks(dest) | src->checks;
+ dest->checks |= src->checks;
// There's nothing else left in src than the base structure.
lzma_free(src, allocator);
@@ -1226,7 +1238,7 @@ lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target)
// Use binary search to locate the exact Record. It is the first
// Record whose uncompressed_sum is greater than target.
- // This is because we want the rightmost Record that fullfills the
+ // This is because we want the rightmost Record that fulfills the
// search criterion. It is possible that there are empty Blocks;
// we don't want to return them.
size_t left = 0;
diff --git a/contrib/libs/lzma/liblzma/common/index_decoder.c b/contrib/libs/lzma/liblzma/common/index_decoder.c
index cc07a1b8c5..b268988533 100644
--- a/contrib/libs/lzma/liblzma/common/index_decoder.c
+++ b/contrib/libs/lzma/liblzma/common/index_decoder.c
@@ -10,7 +10,7 @@
//
///////////////////////////////////////////////////////////////////////////////
-#include "index.h"
+#include "index_decoder.h"
#include "check.h"
@@ -180,8 +180,11 @@ index_decode(void *coder_ptr, const lzma_allocator *allocator,
return LZMA_OK;
if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
- != in[(*in_pos)++])
+ != in[(*in_pos)++]) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
return LZMA_DATA_ERROR;
+#endif
+ }
} while (++coder->pos < 4);
@@ -265,11 +268,11 @@ index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
}
-static lzma_ret
-index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+extern lzma_ret
+lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
lzma_index **i, uint64_t memlimit)
{
- lzma_next_coder_init(&index_decoder_init, next, allocator);
+ lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
if (i == NULL)
return LZMA_PROG_ERROR;
@@ -296,7 +299,7 @@ index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
extern LZMA_API(lzma_ret)
lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
{
- lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
+ lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
strm->internal->supported_actions[LZMA_RUN] = true;
strm->internal->supported_actions[LZMA_FINISH] = true;
diff --git a/contrib/libs/lzma/liblzma/common/index_decoder.h b/contrib/libs/lzma/liblzma/common/index_decoder.h
new file mode 100644
index 0000000000..1af433b58b
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/common/index_decoder.h
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index_decoder.h
+/// \brief Decodes the Index field
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_INDEX_DECODER_H
+#define LZMA_INDEX_DECODER_H
+
+#include "index.h"
+
+
+extern lzma_ret lzma_index_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ lzma_index **i, uint64_t memlimit);
+
+
+#endif
diff --git a/contrib/libs/lzma/liblzma/common/index_hash.c b/contrib/libs/lzma/liblzma/common/index_hash.c
index d7a0344b76..34df85d72f 100644
--- a/contrib/libs/lzma/liblzma/common/index_hash.c
+++ b/contrib/libs/lzma/liblzma/common/index_hash.c
@@ -122,7 +122,7 @@ lzma_index_hash_size(const lzma_index_hash *index_hash)
/// Updates the sizes and the hash without any validation.
-static lzma_ret
+static void
hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
lzma_vli uncompressed_size)
{
@@ -136,7 +136,7 @@ hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
lzma_check_update(&info->check, LZMA_CHECK_BEST,
(const uint8_t *)(sizes), sizeof(sizes));
- return LZMA_OK;
+ return;
}
@@ -152,8 +152,7 @@ lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
return LZMA_PROG_ERROR;
// Update the hash.
- return_if_error(hash_append(&index_hash->blocks,
- unpadded_size, uncompressed_size));
+ hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
// Validate the properties of *info are still in allowed limits.
if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
@@ -239,9 +238,9 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
index_hash->sequence = SEQ_UNCOMPRESSED;
} else {
// Update the hash.
- return_if_error(hash_append(&index_hash->records,
+ hash_append(&index_hash->records,
index_hash->unpadded_size,
- index_hash->uncompressed_size));
+ index_hash->uncompressed_size);
// Verify that we don't go over the known sizes. Note
// that this validation is simpler than the one used
@@ -313,8 +312,11 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
return LZMA_OK;
if (((index_hash->crc32 >> (index_hash->pos * 8))
- & 0xFF) != in[(*in_pos)++])
+ & 0xFF) != in[(*in_pos)++]) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
return LZMA_DATA_ERROR;
+#endif
+ }
} while (++index_hash->pos < 4);
diff --git a/contrib/libs/lzma/liblzma/common/lzip_decoder.c b/contrib/libs/lzma/liblzma/common/lzip_decoder.c
new file mode 100644
index 0000000000..20794f9466
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/common/lzip_decoder.c
@@ -0,0 +1,414 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file lzip_decoder.c
+/// \brief Decodes .lz (lzip) files
+//
+// Author: Michał Górny
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzip_decoder.h"
+#include "lzma_decoder.h"
+#include "check.h"
+
+
+// .lz format version 0 lacks the 64-bit Member size field in the footer.
+#define LZIP_V0_FOOTER_SIZE 12
+#define LZIP_V1_FOOTER_SIZE 20
+#define LZIP_FOOTER_SIZE_MAX LZIP_V1_FOOTER_SIZE
+
+// lc/lp/pb are hardcoded in the .lz format.
+#define LZIP_LC 3
+#define LZIP_LP 0
+#define LZIP_PB 2
+
+
+typedef struct {
+ enum {
+ SEQ_ID_STRING,
+ SEQ_VERSION,
+ SEQ_DICT_SIZE,
+ SEQ_CODER_INIT,
+ SEQ_LZMA_STREAM,
+ SEQ_MEMBER_FOOTER,
+ } sequence;
+
+ /// .lz member format version
+ uint32_t version;
+
+ /// CRC32 of the uncompressed data in the .lz member
+ uint32_t crc32;
+
+ /// Uncompressed size of the .lz member
+ uint64_t uncompressed_size;
+
+ /// Compressed size of the .lz member
+ uint64_t member_size;
+
+ /// Memory usage limit
+ uint64_t memlimit;
+
+ /// Amount of memory actually needed
+ uint64_t memusage;
+
+ /// If true, LZMA_GET_CHECK is returned after decoding the header
+ /// fields. As all files use CRC32 this is redundant but it's
+ /// implemented anyway since the initialization functions supports
+ /// all other flags in addition to LZMA_TELL_ANY_CHECK.
+ bool tell_any_check;
+
+ /// If true, we won't calculate or verify the CRC32 of
+ /// the uncompressed data.
+ bool ignore_check;
+
+ /// If true, we will decode concatenated .lz members and stop if
+ /// non-.lz data is seen after at least one member has been
+ /// successfully decoded.
+ bool concatenated;
+
+ /// When decoding concatenated .lz members, this is true as long as
+ /// we are decoding the first .lz member. This is needed to avoid
+ /// incorrect LZMA_FORMAT_ERROR in case there is non-.lz data at
+ /// the end of the file.
+ bool first_member;
+
+ /// Reading position in the header and footer fields
+ size_t pos;
+
+ /// Buffer to hold the .lz footer fields
+ uint8_t buffer[LZIP_FOOTER_SIZE_MAX];
+
+ /// Options decoded from the .lz header that needed to initialize
+ /// the LZMA1 decoder.
+ lzma_options_lzma options;
+
+ /// LZMA1 decoder
+ lzma_next_coder lzma_decoder;
+
+} lzma_lzip_coder;
+
+
+static lzma_ret
+lzip_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_lzip_coder *coder = coder_ptr;
+
+ while (true)
+ switch (coder->sequence) {
+ case SEQ_ID_STRING: {
+ // The "ID string" or magic bytes are "LZIP" in US-ASCII.
+ const uint8_t lzip_id_string[4] = { 0x4C, 0x5A, 0x49, 0x50 };
+
+ while (coder->pos < sizeof(lzip_id_string)) {
+ if (*in_pos >= in_size) {
+ // If we are on the 2nd+ concatenated member
+ // and the input ends before we can read
+ // the magic bytes, we discard the bytes that
+ // were already read (up to 3) and finish.
+ // See the reasoning below.
+ return !coder->first_member
+ && action == LZMA_FINISH
+ ? LZMA_STREAM_END : LZMA_OK;
+ }
+
+ if (in[*in_pos] != lzip_id_string[coder->pos]) {
+ // The .lz format allows putting non-.lz data
+ // at the end of the file. If we have seen
+ // at least one valid .lz member already,
+ // then we won't consume the byte at *in_pos
+ // and will return LZMA_STREAM_END. This way
+ // apps can easily locate and read the non-.lz
+ // data after the .lz member(s).
+ //
+ // NOTE: If the first 1-3 bytes of the non-.lz
+ // data match the .lz ID string then the first
+ // 1-3 bytes of the junk will get ignored by
+ // us. If apps want to properly locate the
+ // trailing data they must ensure that the
+ // first byte of their custom data isn't the
+ // same as the first byte of .lz ID string.
+ // With the liblzma API we cannot rewind the
+ // input position across calls to lzma_code().
+ return !coder->first_member
+ ? LZMA_STREAM_END : LZMA_FORMAT_ERROR;
+ }
+
+ ++*in_pos;
+ ++coder->pos;
+ }
+
+ coder->pos = 0;
+
+ coder->crc32 = 0;
+ coder->uncompressed_size = 0;
+ coder->member_size = sizeof(lzip_id_string);
+
+ coder->sequence = SEQ_VERSION;
+ }
+
+ // Fall through
+
+ case SEQ_VERSION:
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ coder->version = in[(*in_pos)++];
+
+ // We support version 0 and unextended version 1.
+ if (coder->version > 1)
+ return LZMA_OPTIONS_ERROR;
+
+ ++coder->member_size;
+ coder->sequence = SEQ_DICT_SIZE;
+
+ // .lz versions 0 and 1 use CRC32 as the integrity check
+ // so if the application wanted to know that
+ // (LZMA_TELL_ANY_CHECK) we can tell it now.
+ if (coder->tell_any_check)
+ return LZMA_GET_CHECK;
+
+ // Fall through
+
+ case SEQ_DICT_SIZE: {
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ const uint32_t ds = in[(*in_pos)++];
+ ++coder->member_size;
+
+ // The five lowest bits are for the base-2 logarithm of
+ // the dictionary size and the highest three bits are
+ // the fractional part (0/16 to 7/16) that will be
+ // substracted to get the final value.
+ //
+ // For example, with 0xB5:
+ // b2log = 21
+ // fracnum = 5
+ // dict_size = 2^21 - 2^21 * 5 / 16 = 1408 KiB
+ const uint32_t b2log = ds & 0x1F;
+ const uint32_t fracnum = ds >> 5;
+
+ // The format versions 0 and 1 allow dictionary size in the
+ // range [4 KiB, 512 MiB].
+ if (b2log < 12 || b2log > 29 || (b2log == 12 && fracnum > 0))
+ return LZMA_DATA_ERROR;
+
+ // 2^[b2log] - 2^[b2log] * [fracnum] / 16
+ // = 2^[b2log] - [fracnum] * 2^([b2log] - 4)
+ coder->options.dict_size = (UINT32_C(1) << b2log)
+ - (fracnum << (b2log - 4));
+
+ assert(coder->options.dict_size >= 4096);
+ assert(coder->options.dict_size <= (UINT32_C(512) << 20));
+
+ coder->options.preset_dict = NULL;
+ coder->options.lc = LZIP_LC;
+ coder->options.lp = LZIP_LP;
+ coder->options.pb = LZIP_PB;
+
+ // Calculate the memory usage.
+ coder->memusage = lzma_lzma_decoder_memusage(&coder->options)
+ + LZMA_MEMUSAGE_BASE;
+
+ // Initialization is a separate step because if we return
+ // LZMA_MEMLIMIT_ERROR we need to be able to restart after
+ // the memlimit has been increased.
+ coder->sequence = SEQ_CODER_INIT;
+ }
+
+ // Fall through
+
+ case SEQ_CODER_INIT: {
+ if (coder->memusage > coder->memlimit)
+ return LZMA_MEMLIMIT_ERROR;
+
+ const lzma_filter_info filters[2] = {
+ {
+ .id = LZMA_FILTER_LZMA1,
+ .init = &lzma_lzma_decoder_init,
+ .options = &coder->options,
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return_if_error(lzma_next_filter_init(&coder->lzma_decoder,
+ allocator, filters));
+
+ coder->crc32 = 0;
+ coder->sequence = SEQ_LZMA_STREAM;
+ }
+
+ // Fall through
+
+ case SEQ_LZMA_STREAM: {
+ const size_t in_start = *in_pos;
+ const size_t out_start = *out_pos;
+
+ const lzma_ret ret = coder->lzma_decoder.code(
+ coder->lzma_decoder.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size,
+ action);
+
+ const size_t out_used = *out_pos - out_start;
+
+ coder->member_size += *in_pos - in_start;
+ coder->uncompressed_size += out_used;
+
+ if (!coder->ignore_check)
+ coder->crc32 = lzma_crc32(out + out_start, out_used,
+ coder->crc32);
+
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ coder->sequence = SEQ_MEMBER_FOOTER;
+ }
+
+ // Fall through
+
+ case SEQ_MEMBER_FOOTER: {
+ // The footer of .lz version 0 lacks the Member size field.
+ // This is the only difference between version 0 and
+ // unextended version 1 formats.
+ const size_t footer_size = coder->version == 0
+ ? LZIP_V0_FOOTER_SIZE
+ : LZIP_V1_FOOTER_SIZE;
+
+ // Copy the CRC32, Data size, and Member size fields to
+ // the internal buffer.
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ footer_size);
+
+ // Return if we didn't get the whole footer yet.
+ if (coder->pos < footer_size)
+ return LZMA_OK;
+
+ coder->pos = 0;
+ coder->member_size += footer_size;
+
+ // Check that the footer fields match the observed data.
+ if (!coder->ignore_check
+ && coder->crc32 != read32le(&coder->buffer[0]))
+ return LZMA_DATA_ERROR;
+
+ if (coder->uncompressed_size != read64le(&coder->buffer[4]))
+ return LZMA_DATA_ERROR;
+
+ if (coder->version > 0) {
+ // .lz version 0 has no Member size field.
+ if (coder->member_size != read64le(&coder->buffer[12]))
+ return LZMA_DATA_ERROR;
+ }
+
+ // Decoding is finished if we weren't requested to decode
+ // more than one .lz member.
+ if (!coder->concatenated)
+ return LZMA_STREAM_END;
+
+ coder->first_member = false;
+ coder->sequence = SEQ_ID_STRING;
+ break;
+ }
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ // Never reached
+}
+
+
+static void
+lzip_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_lzip_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma_decoder, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_check
+lzip_decoder_get_check(const void *coder_ptr lzma_attribute((__unused__)))
+{
+ return LZMA_CHECK_CRC32;
+}
+
+
+static lzma_ret
+lzip_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ lzma_lzip_coder *coder = coder_ptr;
+
+ *memusage = coder->memusage;
+ *old_memlimit = coder->memlimit;
+
+ if (new_memlimit != 0) {
+ if (new_memlimit < coder->memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ coder->memlimit = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_lzip_decoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, uint32_t flags)
+{
+ lzma_next_coder_init(&lzma_lzip_decoder_init, next, allocator);
+
+ if (flags & ~LZMA_SUPPORTED_FLAGS)
+ return LZMA_OPTIONS_ERROR;
+
+ lzma_lzip_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_lzip_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &lzip_decode;
+ next->end = &lzip_decoder_end;
+ next->get_check = &lzip_decoder_get_check;
+ next->memconfig = &lzip_decoder_memconfig;
+
+ coder->lzma_decoder = LZMA_NEXT_CODER_INIT;
+ }
+
+ coder->sequence = SEQ_ID_STRING;
+ coder->memlimit = my_max(1, memlimit);
+ coder->memusage = LZMA_MEMUSAGE_BASE;
+ coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
+ coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
+ coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
+ coder->first_member = true;
+ coder->pos = 0;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_lzip_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
+{
+ lzma_next_strm_init(lzma_lzip_decoder_init, strm, memlimit, flags);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/contrib/libs/lzma/liblzma/common/lzip_decoder.h b/contrib/libs/lzma/liblzma/common/lzip_decoder.h
new file mode 100644
index 0000000000..33a01c352c
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/common/lzip_decoder.h
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file lzip_decoder.h
+/// \brief Decodes .lz (lzip) files
+//
+// Author: Michał Górny
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_LZIP_DECODER_H
+#define LZMA_LZIP_DECODER_H
+
+#include "common.h"
+
+extern lzma_ret lzma_lzip_decoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, uint32_t flags);
+
+#endif
diff --git a/contrib/libs/lzma/liblzma/common/memcmplen.h b/contrib/libs/lzma/liblzma/common/memcmplen.h
index dcfd8d6f89..5a481a02c9 100644
--- a/contrib/libs/lzma/liblzma/common/memcmplen.h
+++ b/contrib/libs/lzma/liblzma/common/memcmplen.h
@@ -51,10 +51,6 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
|| (defined(__INTEL_COMPILER) && defined(__x86_64__)) \
|| (defined(__INTEL_COMPILER) && defined(_M_X64)) \
|| (defined(_MSC_VER) && defined(_M_X64)))
- // NOTE: This will use 64-bit unaligned access which
- // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, but
- // it's convenient here at least as long as it's x86-64 only.
- //
// I keep this x86-64 only for now since that's where I know this
// to be a good method. This may be fine on other 64-bit CPUs too.
// On big endian one should use xor instead of subtraction and switch
@@ -80,12 +76,12 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
&& defined(HAVE__MM_MOVEMASK_EPI8) \
- && ((defined(__GNUC__) && defined(__SSE2_MATH__)) \
- || (defined(__INTEL_COMPILER) && defined(__SSE2__)) \
+ && (defined(__SSE2__) \
|| (defined(_MSC_VER) && defined(_M_IX86_FP) \
&& _M_IX86_FP >= 2))
- // NOTE: Like above, this will use 128-bit unaligned access which
- // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit.
+ // NOTE: This will use 128-bit unaligned access which
+ // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit,
+ // but it's convenient here since this is x86-only.
//
// SSE2 version for 32-bit and 64-bit x86. On x86-64 the above
// version is sometimes significantly faster and sometimes
diff --git a/contrib/libs/lzma/liblzma/common/microlzma_decoder.c b/contrib/libs/lzma/liblzma/common/microlzma_decoder.c
new file mode 100644
index 0000000000..e473373daa
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/common/microlzma_decoder.c
@@ -0,0 +1,221 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file microlzma_decoder.c
+/// \brief Decode MicroLZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_decoder.h"
+#include "lz_decoder.h"
+
+
+typedef struct {
+ /// LZMA1 decoder
+ lzma_next_coder lzma;
+
+ /// Compressed size of the stream as given by the application.
+ /// This must be exactly correct.
+ ///
+ /// This will be decremented when input is read.
+ uint64_t comp_size;
+
+ /// Uncompressed size of the stream as given by the application.
+ /// This may be less than the actual uncompressed size if
+ /// uncomp_size_is_exact is false.
+ ///
+ /// This will be decremented when output is produced.
+ lzma_vli uncomp_size;
+
+ /// LZMA dictionary size as given by the application
+ uint32_t dict_size;
+
+ /// If true, the exact uncompressed size is known. If false,
+ /// uncomp_size may be smaller than the real uncompressed size;
+ /// uncomp_size may never be bigger than the real uncompressed size.
+ bool uncomp_size_is_exact;
+
+ /// True once the first byte of the MicroLZMA stream
+ /// has been processed.
+ bool props_decoded;
+} lzma_microlzma_coder;
+
+
+static lzma_ret
+microlzma_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+
+ // Remember the in start position so that we can update comp_size.
+ const size_t in_start = *in_pos;
+
+ // Remember the out start position so that we can update uncomp_size.
+ const size_t out_start = *out_pos;
+
+ // Limit the amount of input so that the decoder won't read more than
+ // comp_size. This is required when uncomp_size isn't exact because
+ // in that case the LZMA decoder will try to decode more input even
+ // when it has no output space (it can be looking for EOPM).
+ if (in_size - *in_pos > coder->comp_size)
+ in_size = *in_pos + (size_t)(coder->comp_size);
+
+ // When the exact uncompressed size isn't known, we must limit
+ // the available output space to prevent the LZMA decoder from
+ // trying to decode too much.
+ if (!coder->uncomp_size_is_exact
+ && out_size - *out_pos > coder->uncomp_size)
+ out_size = *out_pos + (size_t)(coder->uncomp_size);
+
+ if (!coder->props_decoded) {
+ // There must be at least one byte of input to decode
+ // the properties byte.
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ lzma_options_lzma options = {
+ .dict_size = coder->dict_size,
+ .preset_dict = NULL,
+ .preset_dict_size = 0,
+ .ext_flags = 0, // EOPM not allowed when size is known
+ .ext_size_low = UINT32_MAX, // Unknown size by default
+ .ext_size_high = UINT32_MAX,
+ };
+
+ if (coder->uncomp_size_is_exact)
+ lzma_set_ext_size(options, coder->uncomp_size);
+
+ // The properties are stored as bitwise-negation
+ // of the typical encoding.
+ if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
+ return LZMA_OPTIONS_ERROR;
+
+ ++*in_pos;
+
+ // Initialize the decoder.
+ lzma_filter_info filters[2] = {
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_decoder_init,
+ .options = &options,
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return_if_error(lzma_next_filter_init(&coder->lzma,
+ allocator, filters));
+
+ // Pass one dummy 0x00 byte to the LZMA decoder since that
+ // is what it expects the first byte to be.
+ const uint8_t dummy_in = 0;
+ size_t dummy_in_pos = 0;
+ if (coder->lzma.code(coder->lzma.coder, allocator,
+ &dummy_in, &dummy_in_pos, 1,
+ out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ assert(dummy_in_pos == 1);
+ coder->props_decoded = true;
+ }
+
+ // The rest is normal LZMA decoding.
+ lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, action);
+
+ // Update the remaining compressed size.
+ assert(coder->comp_size >= *in_pos - in_start);
+ coder->comp_size -= *in_pos - in_start;
+
+ if (coder->uncomp_size_is_exact) {
+ // After successful decompression of the complete stream
+ // the compressed size must match.
+ if (ret == LZMA_STREAM_END && coder->comp_size != 0)
+ ret = LZMA_DATA_ERROR;
+ } else {
+ // Update the amount of output remaining.
+ assert(coder->uncomp_size >= *out_pos - out_start);
+ coder->uncomp_size -= *out_pos - out_start;
+
+ // - We must not get LZMA_STREAM_END because the stream
+ // shouldn't have EOPM.
+ // - We must use uncomp_size to determine when to
+ // return LZMA_STREAM_END.
+ if (ret == LZMA_STREAM_END)
+ ret = LZMA_DATA_ERROR;
+ else if (coder->uncomp_size == 0)
+ ret = LZMA_STREAM_END;
+ }
+
+ return ret;
+}
+
+
+static void
+microlzma_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+microlzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t comp_size,
+ uint64_t uncomp_size, bool uncomp_size_is_exact,
+ uint32_t dict_size)
+{
+ lzma_next_coder_init(&microlzma_decoder_init, next, allocator);
+
+ lzma_microlzma_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &microlzma_decode;
+ next->end = &microlzma_decoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ // The public API is uint64_t but the internal LZ decoder API uses
+ // lzma_vli.
+ if (uncomp_size > LZMA_VLI_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ coder->comp_size = comp_size;
+ coder->uncomp_size = uncomp_size;
+ coder->uncomp_size_is_exact = uncomp_size_is_exact;
+ coder->dict_size = dict_size;
+
+ coder->props_decoded = false;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_microlzma_decoder(lzma_stream *strm, uint64_t comp_size,
+ uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
+ uint32_t dict_size)
+{
+ lzma_next_strm_init(microlzma_decoder_init, strm, comp_size,
+ uncomp_size, uncomp_size_is_exact, dict_size);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/contrib/libs/lzma/liblzma/common/microlzma_encoder.c b/contrib/libs/lzma/liblzma/common/microlzma_encoder.c
new file mode 100644
index 0000000000..d3ef0632dd
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/common/microlzma_encoder.c
@@ -0,0 +1,140 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file microlzma_encoder.c
+/// \brief Encode into MicroLZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_encoder.h"
+
+
+typedef struct {
+ /// LZMA1 encoder
+ lzma_next_coder lzma;
+
+ /// LZMA properties byte (lc/lp/pb)
+ uint8_t props;
+} lzma_microlzma_coder;
+
+
+static lzma_ret
+microlzma_encode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+
+ // Remember *out_pos so that we can overwrite the first byte with
+ // the LZMA properties byte.
+ const size_t out_start = *out_pos;
+
+ // Remember *in_pos so that we can set it based on how many
+ // uncompressed bytes were actually encoded.
+ const size_t in_start = *in_pos;
+
+ // Set the output size limit based on the available output space.
+ // We know that the encoder supports set_out_limit() so
+ // LZMA_OPTIONS_ERROR isn't possible. LZMA_BUF_ERROR is possible
+ // but lzma_code() has an assertion to not allow it to be returned
+ // from here and I don't want to change that for now, so
+ // LZMA_BUF_ERROR becomes LZMA_PROG_ERROR.
+ uint64_t uncomp_size;
+ if (coder->lzma.set_out_limit(coder->lzma.coder,
+ &uncomp_size, out_size - *out_pos) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ // set_out_limit fails if this isn't true.
+ assert(out_size - *out_pos >= 6);
+
+ // Encode as much as possible.
+ const lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size, action);
+
+ if (ret != LZMA_STREAM_END) {
+ if (ret == LZMA_OK) {
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ return ret;
+ }
+
+ // The first output byte is bitwise-negation of the properties byte.
+ // We know that there is space for this byte because set_out_limit
+ // and the actual encoding succeeded.
+ out[out_start] = (uint8_t)(~coder->props);
+
+ // The LZMA encoder likely read more input than it was able to encode.
+ // Set *in_pos based on uncomp_size.
+ assert(uncomp_size <= in_size - in_start);
+ *in_pos = in_start + (size_t)(uncomp_size);
+
+ return ret;
+}
+
+
+static void
+microlzma_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+microlzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_options_lzma *options)
+{
+ lzma_next_coder_init(&microlzma_encoder_init, next, allocator);
+
+ lzma_microlzma_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &microlzma_encode;
+ next->end = &microlzma_encoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ // Encode the properties byte. Bitwise-negation of it will be the
+ // first output byte.
+ return_if_error(lzma_lzma_lclppb_encode(options, &coder->props));
+
+ // Initialize the LZMA encoder.
+ const lzma_filter_info filters[2] = {
+ {
+ .id = LZMA_FILTER_LZMA1,
+ .init = &lzma_lzma_encoder_init,
+ .options = (void *)(options),
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return lzma_next_filter_init(&coder->lzma, allocator, filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_microlzma_encoder(lzma_stream *strm, const lzma_options_lzma *options)
+{
+ lzma_next_strm_init(microlzma_encoder_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+
+}
diff --git a/contrib/libs/lzma/liblzma/common/outqueue.c b/contrib/libs/lzma/liblzma/common/outqueue.c
index 2dc8a38d1b..71e8648a29 100644
--- a/contrib/libs/lzma/liblzma/common/outqueue.c
+++ b/contrib/libs/lzma/liblzma/common/outqueue.c
@@ -13,84 +13,121 @@
#include "outqueue.h"
-/// This is to ease integer overflow checking: We may allocate up to
-/// 2 * LZMA_THREADS_MAX buffers and we need some extra memory for other
-/// data structures (that's the second /2).
-#define BUF_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX / 2 / 2)
+/// Get the maximum number of buffers that may be allocated based
+/// on the number of threads. For now this is twice the number of threads.
+/// It's a compromise between RAM usage and keeping the worker threads busy
+/// when buffers finish out of order.
+#define GET_BUFS_LIMIT(threads) (2 * (threads))
-static lzma_ret
-get_options(uint64_t *bufs_alloc_size, uint32_t *bufs_count,
- uint64_t buf_size_max, uint32_t threads)
+extern uint64_t
+lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads)
{
- if (threads > LZMA_THREADS_MAX || buf_size_max > BUF_SIZE_MAX)
- return LZMA_OPTIONS_ERROR;
-
- // The number of buffers is twice the number of threads.
- // This wastes RAM but keeps the threads busy when buffers
- // finish out of order.
+ // This is to ease integer overflow checking: We may allocate up to
+ // GET_BUFS_LIMIT(LZMA_THREADS_MAX) buffers and we need some extra
+ // memory for other data structures too (that's the /2).
//
- // NOTE: If this is changed, update BUF_SIZE_MAX too.
- *bufs_count = threads * 2;
- *bufs_alloc_size = *bufs_count * buf_size_max;
+ // lzma_outq_prealloc_buf() will still accept bigger buffers than this.
+ const uint64_t limit
+ = UINT64_MAX / GET_BUFS_LIMIT(LZMA_THREADS_MAX) / 2;
- return LZMA_OK;
+ if (threads > LZMA_THREADS_MAX || buf_size_max > limit)
+ return UINT64_MAX;
+
+ return GET_BUFS_LIMIT(threads)
+ * lzma_outq_outbuf_memusage(buf_size_max);
}
-extern uint64_t
-lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads)
+static void
+move_head_to_cache(lzma_outq *outq, const lzma_allocator *allocator)
{
- uint64_t bufs_alloc_size;
- uint32_t bufs_count;
+ assert(outq->head != NULL);
+ assert(outq->tail != NULL);
+ assert(outq->bufs_in_use > 0);
- if (get_options(&bufs_alloc_size, &bufs_count, buf_size_max, threads)
- != LZMA_OK)
- return UINT64_MAX;
+ lzma_outbuf *buf = outq->head;
+ outq->head = buf->next;
+ if (outq->head == NULL)
+ outq->tail = NULL;
- return sizeof(lzma_outq) + bufs_count * sizeof(lzma_outbuf)
- + bufs_alloc_size;
+ if (outq->cache != NULL && outq->cache->allocated != buf->allocated)
+ lzma_outq_clear_cache(outq, allocator);
+
+ buf->next = outq->cache;
+ outq->cache = buf;
+
+ --outq->bufs_in_use;
+ outq->mem_in_use -= lzma_outq_outbuf_memusage(buf->allocated);
+
+ return;
+}
+
+
+static void
+free_one_cached_buffer(lzma_outq *outq, const lzma_allocator *allocator)
+{
+ assert(outq->cache != NULL);
+
+ lzma_outbuf *buf = outq->cache;
+ outq->cache = buf->next;
+
+ --outq->bufs_allocated;
+ outq->mem_allocated -= lzma_outq_outbuf_memusage(buf->allocated);
+
+ lzma_free(buf, allocator);
+ return;
+}
+
+
+extern void
+lzma_outq_clear_cache(lzma_outq *outq, const lzma_allocator *allocator)
+{
+ while (outq->cache != NULL)
+ free_one_cached_buffer(outq, allocator);
+
+ return;
+}
+
+
+extern void
+lzma_outq_clear_cache2(lzma_outq *outq, const lzma_allocator *allocator,
+ size_t keep_size)
+{
+ if (outq->cache == NULL)
+ return;
+
+ // Free all but one.
+ while (outq->cache->next != NULL)
+ free_one_cached_buffer(outq, allocator);
+
+ // Free the last one only if its size doesn't equal to keep_size.
+ if (outq->cache->allocated != keep_size)
+ free_one_cached_buffer(outq, allocator);
+
+ return;
}
extern lzma_ret
lzma_outq_init(lzma_outq *outq, const lzma_allocator *allocator,
- uint64_t buf_size_max, uint32_t threads)
+ uint32_t threads)
{
- uint64_t bufs_alloc_size;
- uint32_t bufs_count;
-
- // Set bufs_count and bufs_alloc_size.
- return_if_error(get_options(&bufs_alloc_size, &bufs_count,
- buf_size_max, threads));
-
- // Allocate memory if needed.
- if (outq->buf_size_max != buf_size_max
- || outq->bufs_allocated != bufs_count) {
- lzma_outq_end(outq, allocator);
-
-#if SIZE_MAX < UINT64_MAX
- if (bufs_alloc_size > SIZE_MAX)
- return LZMA_MEM_ERROR;
-#endif
-
- outq->bufs = lzma_alloc(bufs_count * sizeof(lzma_outbuf),
- allocator);
- outq->bufs_mem = lzma_alloc((size_t)(bufs_alloc_size),
- allocator);
-
- if (outq->bufs == NULL || outq->bufs_mem == NULL) {
- lzma_outq_end(outq, allocator);
- return LZMA_MEM_ERROR;
- }
- }
+ if (threads > LZMA_THREADS_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ const uint32_t bufs_limit = GET_BUFS_LIMIT(threads);
+
+ // Clear head/tail.
+ while (outq->head != NULL)
+ move_head_to_cache(outq, allocator);
- // Initialize the rest of the main structure. Initialization of
- // outq->bufs[] is done when they are actually needed.
- outq->buf_size_max = (size_t)(buf_size_max);
- outq->bufs_allocated = bufs_count;
- outq->bufs_pos = 0;
- outq->bufs_used = 0;
+ // If new buf_limit is lower than the old one, we may need to free
+ // a few cached buffers.
+ while (bufs_limit < outq->bufs_allocated)
+ free_one_cached_buffer(outq, allocator);
+
+ outq->bufs_limit = bufs_limit;
outq->read_pos = 0;
return LZMA_OK;
@@ -100,33 +137,81 @@ lzma_outq_init(lzma_outq *outq, const lzma_allocator *allocator,
extern void
lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator)
{
- lzma_free(outq->bufs, allocator);
- outq->bufs = NULL;
-
- lzma_free(outq->bufs_mem, allocator);
- outq->bufs_mem = NULL;
+ while (outq->head != NULL)
+ move_head_to_cache(outq, allocator);
+ lzma_outq_clear_cache(outq, allocator);
return;
}
-extern lzma_outbuf *
-lzma_outq_get_buf(lzma_outq *outq)
+extern lzma_ret
+lzma_outq_prealloc_buf(lzma_outq *outq, const lzma_allocator *allocator,
+ size_t size)
{
// Caller must have checked it with lzma_outq_has_buf().
- assert(outq->bufs_used < outq->bufs_allocated);
+ assert(outq->bufs_in_use < outq->bufs_limit);
+
+ // If there already is appropriately-sized buffer in the cache,
+ // we need to do nothing.
+ if (outq->cache != NULL && outq->cache->allocated == size)
+ return LZMA_OK;
+
+ if (size > SIZE_MAX - sizeof(lzma_outbuf))
+ return LZMA_MEM_ERROR;
- // Initialize the new buffer.
- lzma_outbuf *buf = &outq->bufs[outq->bufs_pos];
- buf->buf = outq->bufs_mem + outq->bufs_pos * outq->buf_size_max;
- buf->size = 0;
+ const size_t alloc_size = lzma_outq_outbuf_memusage(size);
+
+ // The cache may have buffers but their size is wrong.
+ lzma_outq_clear_cache(outq, allocator);
+
+ outq->cache = lzma_alloc(alloc_size, allocator);
+ if (outq->cache == NULL)
+ return LZMA_MEM_ERROR;
+
+ outq->cache->next = NULL;
+ outq->cache->allocated = size;
+
+ ++outq->bufs_allocated;
+ outq->mem_allocated += alloc_size;
+
+ return LZMA_OK;
+}
+
+
+extern lzma_outbuf *
+lzma_outq_get_buf(lzma_outq *outq, void *worker)
+{
+ // Caller must have used lzma_outq_prealloc_buf() to ensure these.
+ assert(outq->bufs_in_use < outq->bufs_limit);
+ assert(outq->bufs_in_use < outq->bufs_allocated);
+ assert(outq->cache != NULL);
+
+ lzma_outbuf *buf = outq->cache;
+ outq->cache = buf->next;
+ buf->next = NULL;
+
+ if (outq->tail != NULL) {
+ assert(outq->head != NULL);
+ outq->tail->next = buf;
+ } else {
+ assert(outq->head == NULL);
+ outq->head = buf;
+ }
+
+ outq->tail = buf;
+
+ buf->worker = worker;
buf->finished = false;
+ buf->finish_ret = LZMA_STREAM_END;
+ buf->pos = 0;
+ buf->decoder_in_pos = 0;
- // Update the queue state.
- if (++outq->bufs_pos == outq->bufs_allocated)
- outq->bufs_pos = 0;
+ buf->unpadded_size = 0;
+ buf->uncompressed_size = 0;
- ++outq->bufs_used;
+ ++outq->bufs_in_use;
+ outq->mem_in_use += lzma_outq_outbuf_memusage(buf->allocated);
return buf;
}
@@ -135,50 +220,68 @@ lzma_outq_get_buf(lzma_outq *outq)
extern bool
lzma_outq_is_readable(const lzma_outq *outq)
{
- uint32_t i = outq->bufs_pos - outq->bufs_used;
- if (outq->bufs_pos < outq->bufs_used)
- i += outq->bufs_allocated;
+ if (outq->head == NULL)
+ return false;
- return outq->bufs[i].finished;
+ return outq->read_pos < outq->head->pos || outq->head->finished;
}
extern lzma_ret
-lzma_outq_read(lzma_outq *restrict outq, uint8_t *restrict out,
- size_t *restrict out_pos, size_t out_size,
+lzma_outq_read(lzma_outq *restrict outq,
+ const lzma_allocator *restrict allocator,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size,
lzma_vli *restrict unpadded_size,
lzma_vli *restrict uncompressed_size)
{
// There must be at least one buffer from which to read.
- if (outq->bufs_used == 0)
+ if (outq->bufs_in_use == 0)
return LZMA_OK;
// Get the buffer.
- uint32_t i = outq->bufs_pos - outq->bufs_used;
- if (outq->bufs_pos < outq->bufs_used)
- i += outq->bufs_allocated;
-
- lzma_outbuf *buf = &outq->bufs[i];
-
- // If it isn't finished yet, we cannot read from it.
- if (!buf->finished)
- return LZMA_OK;
+ lzma_outbuf *buf = outq->head;
// Copy from the buffer to output.
- lzma_bufcpy(buf->buf, &outq->read_pos, buf->size,
+ //
+ // FIXME? In threaded decoder it may be bad to do this copy while
+ // the mutex is being held.
+ lzma_bufcpy(buf->buf, &outq->read_pos, buf->pos,
out, out_pos, out_size);
// Return if we didn't get all the data from the buffer.
- if (outq->read_pos < buf->size)
+ if (!buf->finished || outq->read_pos < buf->pos)
return LZMA_OK;
// The buffer was finished. Tell the caller its size information.
- *unpadded_size = buf->unpadded_size;
- *uncompressed_size = buf->uncompressed_size;
+ if (unpadded_size != NULL)
+ *unpadded_size = buf->unpadded_size;
+
+ if (uncompressed_size != NULL)
+ *uncompressed_size = buf->uncompressed_size;
+
+ // Remember the return value.
+ const lzma_ret finish_ret = buf->finish_ret;
// Free this buffer for further use.
- --outq->bufs_used;
+ move_head_to_cache(outq, allocator);
outq->read_pos = 0;
- return LZMA_STREAM_END;
+ return finish_ret;
+}
+
+
+extern void
+lzma_outq_enable_partial_output(lzma_outq *outq,
+ void (*enable_partial_output)(void *worker))
+{
+ if (outq->head != NULL && !outq->head->finished
+ && outq->head->worker != NULL) {
+ enable_partial_output(outq->head->worker);
+
+ // Set it to NULL since calling it twice is pointless.
+ outq->head->worker = NULL;
+ }
+
+ return;
}
diff --git a/contrib/libs/lzma/liblzma/common/outqueue.h b/contrib/libs/lzma/liblzma/common/outqueue.h
index 079634de45..596911e95e 100644
--- a/contrib/libs/lzma/liblzma/common/outqueue.h
+++ b/contrib/libs/lzma/liblzma/common/outqueue.h
@@ -14,16 +14,36 @@
/// Output buffer for a single thread
-typedef struct {
- /// Pointer to the output buffer of lzma_outq.buf_size_max bytes
- uint8_t *buf;
-
- /// Amount of data written to buf
- size_t size;
+typedef struct lzma_outbuf_s lzma_outbuf;
+struct lzma_outbuf_s {
+ /// Pointer to the next buffer. This is used for the cached buffers.
+ /// The worker thread must not modify this.
+ lzma_outbuf *next;
+
+ /// This initialized by lzma_outq_get_buf() and
+ /// is used by lzma_outq_enable_partial_output().
+ /// The worker thread must not modify this.
+ void *worker;
+
+ /// Amount of memory allocated for buf[].
+ /// The worker thread must not modify this.
+ size_t allocated;
+
+ /// Writing position in the worker thread or, in other words, the
+ /// amount of finished data written to buf[] which can be copied out
+ ///
+ /// \note This is read by another thread and thus access
+ /// to this variable needs a mutex.
+ size_t pos;
- /// Additional size information
- lzma_vli unpadded_size;
- lzma_vli uncompressed_size;
+ /// Decompression: Position in the input buffer in the worker thread
+ /// that matches the output "pos" above. This is used to detect if
+ /// more output might be possible from the worker thread: if it has
+ /// consumed all its input, then more output isn't possible.
+ ///
+ /// \note This is read by another thread and thus access
+ /// to this variable needs a mutex.
+ size_t decoder_in_pos;
/// True when no more data will be written into this buffer.
///
@@ -31,32 +51,55 @@ typedef struct {
/// to this variable needs a mutex.
bool finished;
-} lzma_outbuf;
+ /// Return value for lzma_outq_read() when the last byte from
+ /// a finished buffer has been read. Defaults to LZMA_STREAM_END.
+ /// This must *not* be LZMA_OK. The idea is to allow a decoder to
+ /// pass an error code to the main thread, setting the code here
+ /// together with finished = true.
+ lzma_ret finish_ret;
+
+ /// Additional size information. lzma_outq_read() may read these
+ /// when "finished" is true.
+ lzma_vli unpadded_size;
+ lzma_vli uncompressed_size;
+
+ /// Buffer of "allocated" bytes
+ uint8_t buf[];
+};
typedef struct {
- /// Array of buffers that are used cyclically.
- lzma_outbuf *bufs;
+ /// Linked list of buffers in use. The next output byte will be
+ /// read from the head and buffers for the next thread will be
+ /// appended to the tail. tail->next is always NULL.
+ lzma_outbuf *head;
+ lzma_outbuf *tail;
- /// Memory allocated for all the buffers
- uint8_t *bufs_mem;
+ /// Number of bytes read from head->buf[] in lzma_outq_read()
+ size_t read_pos;
- /// Amount of buffer space available in each buffer
- size_t buf_size_max;
+ /// Linked list of allocated buffers that aren't currently used.
+ /// This way buffers of similar size can be reused and don't
+ /// need to be reallocated every time. For simplicity, all
+ /// cached buffers in the list have the same allocated size.
+ lzma_outbuf *cache;
- /// Number of buffers allocated
- uint32_t bufs_allocated;
+ /// Total amount of memory allocated for buffers
+ uint64_t mem_allocated;
- /// Position in the bufs array. The next buffer to be taken
- /// into use is bufs[bufs_pos].
- uint32_t bufs_pos;
+ /// Amount of memory used by the buffers that are in use in
+ /// the head...tail linked list.
+ uint64_t mem_in_use;
- /// Number of buffers in use
- uint32_t bufs_used;
+ /// Number of buffers in use in the head...tail list. If and only if
+ /// this is zero, the pointers head and tail above are NULL.
+ uint32_t bufs_in_use;
- /// Position in the buffer in lzma_outq_read()
- size_t read_pos;
+ /// Number of buffers allocated (in use + cached)
+ uint32_t bufs_allocated;
+ /// Maximum allowed number of allocated buffers
+ uint32_t bufs_limit;
} lzma_outq;
@@ -76,32 +119,60 @@ extern uint64_t lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads);
/// function knows that there are no previous
/// allocations to free.
/// \param allocator Pointer to allocator or NULL
-/// \param buf_size_max Maximum amount of data that a single buffer
-/// in the queue may need to store.
/// \param threads Number of buffers that may be in use
/// concurrently. Note that more than this number
-/// of buffers will actually get allocated to
+/// of buffers may actually get allocated to
/// improve performance when buffers finish
-/// out of order.
+/// out of order. The actual maximum number of
+/// allocated buffers is derived from the number
+/// of threads.
///
/// \return - LZMA_OK
/// - LZMA_MEM_ERROR
///
-extern lzma_ret lzma_outq_init(
- lzma_outq *outq, const lzma_allocator *allocator,
- uint64_t buf_size_max, uint32_t threads);
+extern lzma_ret lzma_outq_init(lzma_outq *outq,
+ const lzma_allocator *allocator, uint32_t threads);
/// \brief Free the memory associated with the output queue
extern void lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator);
+/// \brief Free all cached buffers that consume memory but aren't in use
+extern void lzma_outq_clear_cache(
+ lzma_outq *outq, const lzma_allocator *allocator);
+
+
+/// \brief Like lzma_outq_clear_cache() but might keep one buffer
+///
+/// One buffer is not freed if its size is equal to keep_size.
+/// This is useful if the caller knows that it will soon need a buffer of
+/// keep_size bytes. This way it won't be freed and immediately reallocated.
+extern void lzma_outq_clear_cache2(
+ lzma_outq *outq, const lzma_allocator *allocator,
+ size_t keep_size);
+
+
+/// \brief Preallocate a new buffer into cache
+///
+/// Splitting the buffer allocation into a separate function makes it
+/// possible to ensure that way lzma_outq_get_buf() cannot fail.
+/// If the preallocated buffer isn't actually used (for example, some
+/// other error occurs), the caller has to do nothing as the buffer will
+/// be used later or cleared from the cache when not needed.
+///
+/// \return LZMA_OK on success, LZMA_MEM_ERROR if allocation fails
+///
+extern lzma_ret lzma_outq_prealloc_buf(
+ lzma_outq *outq, const lzma_allocator *allocator, size_t size);
+
+
/// \brief Get a new buffer
///
-/// lzma_outq_has_buf() must be used to check that there is a buffer
+/// lzma_outq_prealloc_buf() must be used to ensure that there is a buffer
/// available before calling lzma_outq_get_buf().
///
-extern lzma_outbuf *lzma_outq_get_buf(lzma_outq *outq);
+extern lzma_outbuf *lzma_outq_get_buf(lzma_outq *outq, void *worker);
/// \brief Test if there is data ready to be read
@@ -126,17 +197,32 @@ extern bool lzma_outq_is_readable(const lzma_outq *outq);
/// \return - LZMA: All OK. Either no data was available or the buffer
/// being read didn't become empty yet.
/// - LZMA_STREAM_END: The buffer being read was finished.
-/// *unpadded_size and *uncompressed_size were set.
+/// *unpadded_size and *uncompressed_size were set if they
+/// were not NULL.
///
-/// \note This reads lzma_outbuf.finished variables and thus call
-/// to this function needs to be protected with a mutex.
+/// \note This reads lzma_outbuf.finished and .pos variables and thus
+/// calls to this function need to be protected with a mutex.
///
extern lzma_ret lzma_outq_read(lzma_outq *restrict outq,
+ const lzma_allocator *restrict allocator,
uint8_t *restrict out, size_t *restrict out_pos,
size_t out_size, lzma_vli *restrict unpadded_size,
lzma_vli *restrict uncompressed_size);
+/// \brief Enable partial output from a worker thread
+///
+/// If the buffer at the head of the output queue isn't finished,
+/// this will call enable_partial_output on the worker associated with
+/// that output buffer.
+///
+/// \note This reads a lzma_outbuf.finished variable and thus
+/// calls to this function need to be protected with a mutex.
+///
+extern void lzma_outq_enable_partial_output(lzma_outq *outq,
+ void (*enable_partial_output)(void *worker));
+
+
/// \brief Test if there is at least one buffer free
///
/// This must be used before getting a new buffer with lzma_outq_get_buf().
@@ -144,7 +230,7 @@ extern lzma_ret lzma_outq_read(lzma_outq *restrict outq,
static inline bool
lzma_outq_has_buf(const lzma_outq *outq)
{
- return outq->bufs_used < outq->bufs_allocated;
+ return outq->bufs_in_use < outq->bufs_limit;
}
@@ -152,5 +238,17 @@ lzma_outq_has_buf(const lzma_outq *outq)
static inline bool
lzma_outq_is_empty(const lzma_outq *outq)
{
- return outq->bufs_used == 0;
+ return outq->bufs_in_use == 0;
+}
+
+
+/// \brief Get the amount of memory needed for a single lzma_outbuf
+///
+/// \note Caller must check that the argument is significantly less
+/// than SIZE_MAX to avoid an integer overflow!
+static inline uint64_t
+lzma_outq_outbuf_memusage(size_t buf_size)
+{
+ assert(buf_size <= SIZE_MAX - sizeof(lzma_outbuf));
+ return sizeof(lzma_outbuf) + buf_size;
}
diff --git a/contrib/libs/lzma/liblzma/common/stream_decoder.c b/contrib/libs/lzma/liblzma/common/stream_decoder.c
index fdd8ff2f9a..dcf7c1499f 100644
--- a/contrib/libs/lzma/liblzma/common/stream_decoder.c
+++ b/contrib/libs/lzma/liblzma/common/stream_decoder.c
@@ -18,15 +18,14 @@ typedef struct {
enum {
SEQ_STREAM_HEADER,
SEQ_BLOCK_HEADER,
- SEQ_BLOCK,
+ SEQ_BLOCK_INIT,
+ SEQ_BLOCK_RUN,
SEQ_INDEX,
SEQ_STREAM_FOOTER,
SEQ_STREAM_PADDING,
} sequence;
- /// Block or Metadata decoder. This takes little memory and the same
- /// data structure can be used to decode every Block Header, so it's
- /// a good idea to have a separate lzma_next_coder structure for it.
+ /// Block decoder
lzma_next_coder block_decoder;
/// Block options decoded by the Block Header decoder and used by
@@ -63,9 +62,9 @@ typedef struct {
/// If true, we will decode concatenated Streams that possibly have
/// Stream Padding between or after them. LZMA_STREAM_END is returned
- /// once the application isn't giving us any new input, and we aren't
- /// in the middle of a Stream, and possible Stream Padding is a
- /// multiple of four bytes.
+ /// once the application isn't giving us any new input (LZMA_FINISH),
+ /// and we aren't in the middle of a Stream, and possible
+ /// Stream Padding is a multiple of four bytes.
bool concatenated;
/// When decoding concatenated Streams, this is true as long as we
@@ -187,6 +186,15 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
return LZMA_OK;
coder->pos = 0;
+ coder->sequence = SEQ_BLOCK_INIT;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_INIT: {
+ // Checking memusage and doing the initialization needs
+ // its own sequence point because we need to be able to
+ // retry if we return LZMA_MEMLIMIT_ERROR.
// Version 1 is needed to support the .ignore_check option.
coder->block_options.version = 1;
@@ -235,22 +243,20 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
// Free the allocated filter options since they are needed
// only to initialize the Block decoder.
- for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
- lzma_free(filters[i].options, allocator);
-
+ lzma_filters_free(filters, allocator);
coder->block_options.filters = NULL;
- // Check if memory usage calculation and Block enocoder
+ // Check if memory usage calculation and Block decoder
// initialization succeeded.
if (ret != LZMA_OK)
return ret;
- coder->sequence = SEQ_BLOCK;
+ coder->sequence = SEQ_BLOCK_RUN;
}
// Fall through
- case SEQ_BLOCK: {
+ case SEQ_BLOCK_RUN: {
const lzma_ret ret = coder->block_decoder.code(
coder->block_decoder.coder, allocator,
in, in_pos, in_size, out, out_pos, out_size,
diff --git a/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c b/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c
new file mode 100644
index 0000000000..5733c76489
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/common/stream_decoder_mt.c
@@ -0,0 +1,2016 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_decoder_mt.c
+/// \brief Multithreaded .xz Stream decoder
+//
+// Authors: Sebastian Andrzej Siewior
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "block_decoder.h"
+#include "stream_decoder.h"
+#include "index.h"
+#include "outqueue.h"
+
+
+typedef enum {
+ /// Waiting for work.
+ /// Main thread may change this to THR_RUN or THR_EXIT.
+ THR_IDLE,
+
+ /// Decoding is in progress.
+ /// Main thread may change this to THR_STOP or THR_EXIT.
+ /// The worker thread may change this to THR_IDLE.
+ THR_RUN,
+
+ /// The main thread wants the thread to stop whatever it was doing
+ /// but not exit. Main thread may change this to THR_EXIT.
+ /// The worker thread may change this to THR_IDLE.
+ THR_STOP,
+
+ /// The main thread wants the thread to exit.
+ THR_EXIT,
+
+} worker_state;
+
+
+typedef enum {
+ /// Partial updates (storing of worker thread progress
+ /// to lzma_outbuf) are disabled.
+ PARTIAL_DISABLED,
+
+ /// Main thread requests partial updates to be enabled but
+ /// no partial update has been done by the worker thread yet.
+ ///
+ /// Changing from PARTIAL_DISABLED to PARTIAL_START requires
+ /// use of the worker-thread mutex. Other transitions don't
+ /// need a mutex.
+ PARTIAL_START,
+
+ /// Partial updates are enabled and the worker thread has done
+ /// at least one partial update.
+ PARTIAL_ENABLED,
+
+} partial_update_mode;
+
+
+struct worker_thread {
+ /// Worker state is protected with our mutex.
+ worker_state state;
+
+ /// Input buffer that will contain the whole Block except Block Header.
+ uint8_t *in;
+
+ /// Amount of memory allocated for "in"
+ size_t in_size;
+
+ /// Number of bytes written to "in" by the main thread
+ size_t in_filled;
+
+ /// Number of bytes consumed from "in" by the worker thread.
+ size_t in_pos;
+
+ /// Amount of uncompressed data that has been decoded. This local
+ /// copy is needed because updating outbuf->pos requires locking
+ /// the main mutex (coder->mutex).
+ size_t out_pos;
+
+ /// Pointer to the main structure is needed to (1) lock the main
+ /// mutex (coder->mutex) when updating outbuf->pos and (2) when
+ /// putting this thread back to the stack of free threads.
+ struct lzma_stream_coder *coder;
+
+ /// The allocator is set by the main thread. Since a copy of the
+ /// pointer is kept here, the application must not change the
+ /// allocator before calling lzma_end().
+ const lzma_allocator *allocator;
+
+ /// Output queue buffer to which the uncompressed data is written.
+ lzma_outbuf *outbuf;
+
+ /// Amount of compressed data that has already been decompressed.
+ /// This is updated from in_pos when our mutex is locked.
+ /// This is size_t, not uint64_t, because per-thread progress
+ /// is limited to sizes of allocated buffers.
+ size_t progress_in;
+
+ /// Like progress_in but for uncompressed data.
+ size_t progress_out;
+
+ /// Updating outbuf->pos requires locking the main mutex
+ /// (coder->mutex). Since the main thread will only read output
+ /// from the oldest outbuf in the queue, only the worker thread
+ /// that is associated with the oldest outbuf needs to update its
+ /// outbuf->pos. This avoids useless mutex contention that would
+ /// happen if all worker threads were frequently locking the main
+ /// mutex to update their outbuf->pos.
+ ///
+ /// Only when partial_update is something else than PARTIAL_DISABLED,
+ /// this worker thread will update outbuf->pos after each call to
+ /// the Block decoder.
+ partial_update_mode partial_update;
+
+ /// Block decoder
+ lzma_next_coder block_decoder;
+
+ /// Thread-specific Block options are needed because the Block
+ /// decoder modifies the struct given to it at initialization.
+ lzma_block block_options;
+
+ /// Filter chain memory usage
+ uint64_t mem_filters;
+
+ /// Next structure in the stack of free worker threads.
+ struct worker_thread *next;
+
+ mythread_mutex mutex;
+ mythread_cond cond;
+
+ /// The ID of this thread is used to join the thread
+ /// when it's not needed anymore.
+ mythread thread_id;
+};
+
+
+struct lzma_stream_coder {
+ enum {
+ SEQ_STREAM_HEADER,
+ SEQ_BLOCK_HEADER,
+ SEQ_BLOCK_INIT,
+ SEQ_BLOCK_THR_INIT,
+ SEQ_BLOCK_THR_RUN,
+ SEQ_BLOCK_DIRECT_INIT,
+ SEQ_BLOCK_DIRECT_RUN,
+ SEQ_INDEX_WAIT_OUTPUT,
+ SEQ_INDEX_DECODE,
+ SEQ_STREAM_FOOTER,
+ SEQ_STREAM_PADDING,
+ SEQ_ERROR,
+ } sequence;
+
+ /// Block decoder
+ lzma_next_coder block_decoder;
+
+ /// Every Block Header will be decoded into this structure.
+ /// This is also used to initialize a Block decoder when in
+ /// direct mode. In threaded mode, a thread-specific copy will
+ /// be made for decoder initialization because the Block decoder
+ /// will modify the structure given to it.
+ lzma_block block_options;
+
+ /// Buffer to hold a filter chain for Block Header decoding and
+ /// initialization. These are freed after successful Block decoder
+ /// initialization or at stream_decoder_mt_end(). The thread-specific
+ /// copy of block_options won't hold a pointer to filters[] after
+ /// initialization.
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+ /// Stream Flags from Stream Header
+ lzma_stream_flags stream_flags;
+
+ /// Index is hashed so that it can be compared to the sizes of Blocks
+ /// with O(1) memory usage.
+ lzma_index_hash *index_hash;
+
+
+ /// Maximum wait time if cannot use all the input and cannot
+ /// fill the output buffer. This is in milliseconds.
+ uint32_t timeout;
+
+
+ /// Error code from a worker thread.
+ ///
+ /// \note Use mutex.
+ lzma_ret thread_error;
+
+ /// Error code to return after pending output has been copied out. If
+ /// set in read_output_and_wait(), this is a mirror of thread_error.
+ /// If set in stream_decode_mt() then it's, for example, error that
+ /// occurred when decoding Block Header.
+ lzma_ret pending_error;
+
+ /// Number of threads that will be created at maximum.
+ uint32_t threads_max;
+
+ /// Number of thread structures that have been initialized from
+ /// "threads", and thus the number of worker threads actually
+ /// created so far.
+ uint32_t threads_initialized;
+
+ /// Array of allocated thread-specific structures. When no threads
+ /// are in use (direct mode) this is NULL. In threaded mode this
+ /// points to an array of threads_max number of worker_thread structs.
+ struct worker_thread *threads;
+
+ /// Stack of free threads. When a thread finishes, it puts itself
+ /// back into this stack. This starts as empty because threads
+ /// are created only when actually needed.
+ ///
+ /// \note Use mutex.
+ struct worker_thread *threads_free;
+
+ /// The most recent worker thread to which the main thread writes
+ /// the new input from the application.
+ struct worker_thread *thr;
+
+ /// Output buffer queue for decompressed data from the worker threads
+ ///
+ /// \note Use mutex with operations that need it.
+ lzma_outq outq;
+
+ mythread_mutex mutex;
+ mythread_cond cond;
+
+
+ /// Memory usage that will not be exceeded in multi-threaded mode.
+ /// Single-threaded mode can exceed this even by a large amount.
+ uint64_t memlimit_threading;
+
+ /// Memory usage limit that should never be exceeded.
+ /// LZMA_MEMLIMIT_ERROR will be returned if decoding isn't possible
+ /// even in single-threaded mode without exceeding this limit.
+ uint64_t memlimit_stop;
+
+ /// Amount of memory in use by the direct mode decoder
+ /// (coder->block_decoder). In threaded mode this is 0.
+ uint64_t mem_direct_mode;
+
+ /// Amount of memory needed by the running worker threads.
+ /// This doesn't include the memory needed by the output buffer.
+ ///
+ /// \note Use mutex.
+ uint64_t mem_in_use;
+
+ /// Amount of memory used by the idle (cached) threads.
+ ///
+ /// \note Use mutex.
+ uint64_t mem_cached;
+
+
+ /// Amount of memory needed for the filter chain of the next Block.
+ uint64_t mem_next_filters;
+
+ /// Amount of memory needed for the thread-specific input buffer
+ /// for the next Block.
+ uint64_t mem_next_in;
+
+ /// Amount of memory actually needed to decode the next Block
+ /// in threaded mode. This is
+ /// mem_next_filters + mem_next_in + memory needed for lzma_outbuf.
+ uint64_t mem_next_block;
+
+
+ /// Amount of compressed data in Stream Header + Blocks that have
+ /// already been finished.
+ ///
+ /// \note Use mutex.
+ uint64_t progress_in;
+
+ /// Amount of uncompressed data in Blocks that have already
+ /// been finished.
+ ///
+ /// \note Use mutex.
+ uint64_t progress_out;
+
+
+ /// If true, LZMA_NO_CHECK is returned if the Stream has
+ /// no integrity check.
+ bool tell_no_check;
+
+ /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
+ /// an integrity check that isn't supported by this liblzma build.
+ bool tell_unsupported_check;
+
+ /// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
+ bool tell_any_check;
+
+ /// If true, we will tell the Block decoder to skip calculating
+ /// and verifying the integrity check.
+ bool ignore_check;
+
+ /// If true, we will decode concatenated Streams that possibly have
+ /// Stream Padding between or after them. LZMA_STREAM_END is returned
+ /// once the application isn't giving us any new input (LZMA_FINISH),
+ /// and we aren't in the middle of a Stream, and possible
+ /// Stream Padding is a multiple of four bytes.
+ bool concatenated;
+
+ /// If true, we will return any errors immediately instead of first
+ /// producing all output before the location of the error.
+ bool fail_fast;
+
+
+ /// When decoding concatenated Streams, this is true as long as we
+ /// are decoding the first Stream. This is needed to avoid misleading
+ /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
+ /// bytes.
+ bool first_stream;
+
+ /// This is used to track if the previous call to stream_decode_mt()
+ /// had output space (*out_pos < out_size) and managed to fill the
+ /// output buffer (*out_pos == out_size). This may be set to true
+ /// in read_output_and_wait(). This is read and then reset to false
+ /// at the beginning of stream_decode_mt().
+ ///
+ /// This is needed to support applications that call lzma_code() in
+ /// such a way that more input is provided only when lzma_code()
+ /// didn't fill the output buffer completely. Basically, this makes
+ /// it easier to convert such applications from single-threaded
+ /// decoder to multi-threaded decoder.
+ bool out_was_filled;
+
+ /// Write position in buffer[] and position in Stream Padding
+ size_t pos;
+
+ /// Buffer to hold Stream Header, Block Header, and Stream Footer.
+ /// Block Header has biggest maximum size.
+ uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
+};
+
+
+/// Enables updating of outbuf->pos. This is a callback function that is
+/// used with lzma_outq_enable_partial_output().
+static void
+worker_enable_partial_update(void *thr_ptr)
+{
+ struct worker_thread *thr = thr_ptr;
+
+ mythread_sync(thr->mutex) {
+ thr->partial_update = PARTIAL_START;
+ mythread_cond_signal(&thr->cond);
+ }
+}
+
+
+/// Things do to at THR_STOP or when finishing a Block.
+/// This is called with thr->mutex locked.
+static void
+worker_stop(struct worker_thread *thr)
+{
+ // Update memory usage counters.
+ thr->coder->mem_in_use -= thr->in_size;
+ thr->in_size = 0; // thr->in was freed above.
+
+ thr->coder->mem_in_use -= thr->mem_filters;
+ thr->coder->mem_cached += thr->mem_filters;
+
+ // Put this thread to the stack of free threads.
+ thr->next = thr->coder->threads_free;
+ thr->coder->threads_free = thr;
+
+ mythread_cond_signal(&thr->coder->cond);
+ return;
+}
+
+
+static MYTHREAD_RET_TYPE
+worker_decoder(void *thr_ptr)
+{
+ struct worker_thread *thr = thr_ptr;
+ size_t in_filled;
+ partial_update_mode partial_update;
+ lzma_ret ret;
+
+next_loop_lock:
+
+ mythread_mutex_lock(&thr->mutex);
+next_loop_unlocked:
+
+ if (thr->state == THR_IDLE) {
+ mythread_cond_wait(&thr->cond, &thr->mutex);
+ goto next_loop_unlocked;
+ }
+
+ if (thr->state == THR_EXIT) {
+ mythread_mutex_unlock(&thr->mutex);
+
+ lzma_free(thr->in, thr->allocator);
+ lzma_next_end(&thr->block_decoder, thr->allocator);
+
+ mythread_mutex_destroy(&thr->mutex);
+ mythread_cond_destroy(&thr->cond);
+
+ return MYTHREAD_RET_VALUE;
+ }
+
+ if (thr->state == THR_STOP) {
+ thr->state = THR_IDLE;
+ mythread_mutex_unlock(&thr->mutex);
+
+ mythread_sync(thr->coder->mutex) {
+ worker_stop(thr);
+ }
+
+ goto next_loop_lock;
+ }
+
+ assert(thr->state == THR_RUN);
+
+ // Update progress info for get_progress().
+ thr->progress_in = thr->in_pos;
+ thr->progress_out = thr->out_pos;
+
+ // If we don't have any new input, wait for a signal from the main
+ // thread except if partial output has just been enabled. In that
+ // case we will do one normal run so that the partial output info
+ // gets passed to the main thread. The call to block_decoder.code()
+ // is useless but harmless as it can occur only once per Block.
+ in_filled = thr->in_filled;
+ partial_update = thr->partial_update;
+
+ if (in_filled == thr->in_pos && partial_update != PARTIAL_START) {
+ mythread_cond_wait(&thr->cond, &thr->mutex);
+ goto next_loop_unlocked;
+ }
+
+ mythread_mutex_unlock(&thr->mutex);
+
+ // Pass the input in small chunks to the Block decoder.
+ // This way we react reasonably fast if we are told to stop/exit,
+ // and (when partial update is enabled) we tell about our progress
+ // to the main thread frequently enough.
+ const size_t chunk_size = 16384;
+ if ((in_filled - thr->in_pos) > chunk_size)
+ in_filled = thr->in_pos + chunk_size;
+
+ ret = thr->block_decoder.code(
+ thr->block_decoder.coder, thr->allocator,
+ thr->in, &thr->in_pos, in_filled,
+ thr->outbuf->buf, &thr->out_pos,
+ thr->outbuf->allocated, LZMA_RUN);
+
+ if (ret == LZMA_OK) {
+ if (partial_update != PARTIAL_DISABLED) {
+ // The main thread uses thr->mutex to change from
+ // PARTIAL_DISABLED to PARTIAL_START. The main thread
+ // doesn't care about this variable after that so we
+ // can safely change it here to PARTIAL_ENABLED
+ // without a mutex.
+ thr->partial_update = PARTIAL_ENABLED;
+
+ // The main thread is reading decompressed data
+ // from thr->outbuf. Tell the main thread about
+ // our progress.
+ //
+ // NOTE: It's possible that we consumed input without
+ // producing any new output so it's possible that
+ // only in_pos has changed. In case of PARTIAL_START
+ // it is possible that neither in_pos nor out_pos has
+ // changed.
+ mythread_sync(thr->coder->mutex) {
+ thr->outbuf->pos = thr->out_pos;
+ thr->outbuf->decoder_in_pos = thr->in_pos;
+ mythread_cond_signal(&thr->coder->cond);
+ }
+ }
+
+ goto next_loop_lock;
+ }
+
+ // Either we finished successfully (LZMA_STREAM_END) or an error
+ // occurred. Both cases are handled almost identically. The error
+ // case requires updating thr->coder->thread_error.
+ //
+ // The sizes are in the Block Header and the Block decoder
+ // checks that they match, thus we know these:
+ assert(ret != LZMA_STREAM_END || thr->in_pos == thr->in_size);
+ assert(ret != LZMA_STREAM_END
+ || thr->out_pos == thr->block_options.uncompressed_size);
+
+ // Free the input buffer. Don't update in_size as we need
+ // it later to update thr->coder->mem_in_use.
+ lzma_free(thr->in, thr->allocator);
+ thr->in = NULL;
+
+ mythread_sync(thr->mutex) {
+ if (thr->state != THR_EXIT)
+ thr->state = THR_IDLE;
+ }
+
+ mythread_sync(thr->coder->mutex) {
+ // Move our progress info to the main thread.
+ thr->coder->progress_in += thr->in_pos;
+ thr->coder->progress_out += thr->out_pos;
+ thr->progress_in = 0;
+ thr->progress_out = 0;
+
+ // Mark the outbuf as finished.
+ thr->outbuf->pos = thr->out_pos;
+ thr->outbuf->decoder_in_pos = thr->in_pos;
+ thr->outbuf->finished = true;
+ thr->outbuf->finish_ret = ret;
+ thr->outbuf = NULL;
+
+ // If an error occurred, tell it to the main thread.
+ if (ret != LZMA_STREAM_END
+ && thr->coder->thread_error == LZMA_OK)
+ thr->coder->thread_error = ret;
+
+ worker_stop(thr);
+ }
+
+ goto next_loop_lock;
+}
+
+
+/// Tells the worker threads to exit and waits for them to terminate.
+static void
+threads_end(struct lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ coder->threads[i].state = THR_EXIT;
+ mythread_cond_signal(&coder->threads[i].cond);
+ }
+ }
+
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i)
+ mythread_join(coder->threads[i].thread_id);
+
+ lzma_free(coder->threads, allocator);
+ coder->threads_initialized = 0;
+ coder->threads = NULL;
+ coder->threads_free = NULL;
+
+ // The threads don't update these when they exit. Do it here.
+ coder->mem_in_use = 0;
+ coder->mem_cached = 0;
+
+ return;
+}
+
+
+static void
+threads_stop(struct lzma_stream_coder *coder)
+{
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ // The state must be changed conditionally because
+ // THR_IDLE -> THR_STOP is not a valid state change.
+ if (coder->threads[i].state != THR_IDLE) {
+ coder->threads[i].state = THR_STOP;
+ mythread_cond_signal(&coder->threads[i].cond);
+ }
+ }
+ }
+
+ return;
+}
+
+
+/// Initialize a new worker_thread structure and create a new thread.
+static lzma_ret
+initialize_new_thread(struct lzma_stream_coder *coder,
+ const lzma_allocator *allocator)
+{
+ // Allocate the coder->threads array if needed. It's done here instead
+ // of when initializing the decoder because we don't need this if we
+ // use the direct mode (we may even free coder->threads in the middle
+ // of the file if we switch from threaded to direct mode).
+ if (coder->threads == NULL) {
+ coder->threads = lzma_alloc(
+ coder->threads_max * sizeof(struct worker_thread),
+ allocator);
+
+ if (coder->threads == NULL)
+ return LZMA_MEM_ERROR;
+ }
+
+ // Pick a free structure.
+ assert(coder->threads_initialized < coder->threads_max);
+ struct worker_thread *thr
+ = &coder->threads[coder->threads_initialized];
+
+ if (mythread_mutex_init(&thr->mutex))
+ goto error_mutex;
+
+ if (mythread_cond_init(&thr->cond))
+ goto error_cond;
+
+ thr->state = THR_IDLE;
+ thr->in = NULL;
+ thr->in_size = 0;
+ thr->allocator = allocator;
+ thr->coder = coder;
+ thr->outbuf = NULL;
+ thr->block_decoder = LZMA_NEXT_CODER_INIT;
+ thr->mem_filters = 0;
+
+ if (mythread_create(&thr->thread_id, worker_decoder, thr))
+ goto error_thread;
+
+ ++coder->threads_initialized;
+ coder->thr = thr;
+
+ return LZMA_OK;
+
+error_thread:
+ mythread_cond_destroy(&thr->cond);
+
+error_cond:
+ mythread_mutex_destroy(&thr->mutex);
+
+error_mutex:
+ return LZMA_MEM_ERROR;
+}
+
+
+static lzma_ret
+get_thread(struct lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+ // If there is a free structure on the stack, use it.
+ mythread_sync(coder->mutex) {
+ if (coder->threads_free != NULL) {
+ coder->thr = coder->threads_free;
+ coder->threads_free = coder->threads_free->next;
+
+ // The thread is no longer in the cache so substract
+ // it from the cached memory usage. Don't add it
+ // to mem_in_use though; the caller will handle it
+ // since it knows how much memory it will actually
+ // use (the filter chain might change).
+ coder->mem_cached -= coder->thr->mem_filters;
+ }
+ }
+
+ if (coder->thr == NULL) {
+ assert(coder->threads_initialized < coder->threads_max);
+
+ // Initialize a new thread.
+ return_if_error(initialize_new_thread(coder, allocator));
+ }
+
+ coder->thr->in_filled = 0;
+ coder->thr->in_pos = 0;
+ coder->thr->out_pos = 0;
+
+ coder->thr->progress_in = 0;
+ coder->thr->progress_out = 0;
+
+ coder->thr->partial_update = PARTIAL_DISABLED;
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+read_output_and_wait(struct lzma_stream_coder *coder,
+ const lzma_allocator *allocator,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size,
+ bool *input_is_possible,
+ bool waiting_allowed,
+ mythread_condtime *wait_abs, bool *has_blocked)
+{
+ lzma_ret ret = LZMA_OK;
+
+ mythread_sync(coder->mutex) {
+ do {
+ // Get as much output from the queue as is possible
+ // without blocking.
+ const size_t out_start = *out_pos;
+ do {
+ ret = lzma_outq_read(&coder->outq, allocator,
+ out, out_pos, out_size,
+ NULL, NULL);
+
+ // If a Block was finished, tell the worker
+ // thread of the next Block (if it is still
+ // running) to start telling the main thread
+ // when new output is available.
+ if (ret == LZMA_STREAM_END)
+ lzma_outq_enable_partial_output(
+ &coder->outq,
+ &worker_enable_partial_update);
+
+ // Loop until a Block wasn't finished.
+ // It's important to loop around even if
+ // *out_pos == out_size because there could
+ // be an empty Block that will return
+ // LZMA_STREAM_END without needing any
+ // output space.
+ } while (ret == LZMA_STREAM_END);
+
+ // Check if lzma_outq_read reported an error from
+ // the Block decoder.
+ if (ret != LZMA_OK)
+ break;
+
+ // If the output buffer is now full but it wasn't full
+ // when this function was called, set out_was_filled.
+ // This way the next call to stream_decode_mt() knows
+ // that some output was produced and no output space
+ // remained in the previous call to stream_decode_mt().
+ if (*out_pos == out_size && *out_pos != out_start)
+ coder->out_was_filled = true;
+
+ // Check if any thread has indicated an error.
+ if (coder->thread_error != LZMA_OK) {
+ // If LZMA_FAIL_FAST was used, report errors
+ // from worker threads immediately.
+ if (coder->fail_fast) {
+ ret = coder->thread_error;
+ break;
+ }
+
+ // Otherwise set pending_error. The value we
+ // set here will not actually get used other
+ // than working as a flag that an error has
+ // occurred. This is because in SEQ_ERROR
+ // all output before the error will be read
+ // first by calling this function, and once we
+ // reach the location of the (first) error the
+ // error code from the above lzma_outq_read()
+ // will be returned to the application.
+ //
+ // Use LZMA_PROG_ERROR since the value should
+ // never leak to the application. It's
+ // possible that pending_error has already
+ // been set but that doesn't matter: if we get
+ // here, pending_error only works as a flag.
+ coder->pending_error = LZMA_PROG_ERROR;
+ }
+
+ // Check if decoding of the next Block can be started.
+ // The memusage of the active threads must be low
+ // enough, there must be a free buffer slot in the
+ // output queue, and there must be a free thread
+ // (that can be either created or an existing one
+ // reused).
+ //
+ // NOTE: This is checked after reading the output
+ // above because reading the output can free a slot in
+ // the output queue and also reduce active memusage.
+ //
+ // NOTE: If output queue is empty, then input will
+ // always be possible.
+ if (input_is_possible != NULL
+ && coder->memlimit_threading
+ - coder->mem_in_use
+ - coder->outq.mem_in_use
+ >= coder->mem_next_block
+ && lzma_outq_has_buf(&coder->outq)
+ && (coder->threads_initialized
+ < coder->threads_max
+ || coder->threads_free
+ != NULL)) {
+ *input_is_possible = true;
+ break;
+ }
+
+ // If the caller doesn't want us to block, return now.
+ if (!waiting_allowed)
+ break;
+
+ // This check is needed only when input_is_possible
+ // is NULL. We must return if we aren't waiting for
+ // input to become possible and there is no more
+ // output coming from the queue.
+ if (lzma_outq_is_empty(&coder->outq)) {
+ assert(input_is_possible == NULL);
+ break;
+ }
+
+ // If there is more data available from the queue,
+ // our out buffer must be full and we need to return
+ // so that the application can provide more output
+ // space.
+ //
+ // NOTE: In general lzma_outq_is_readable() can return
+ // true also when there are no more bytes available.
+ // This can happen when a Block has finished without
+ // providing any new output. We know that this is not
+ // the case because in the beginning of this loop we
+ // tried to read as much as possible even when we had
+ // no output space left and the mutex has been locked
+ // all the time (so worker threads cannot have changed
+ // anything). Thus there must be actual pending output
+ // in the queue.
+ if (lzma_outq_is_readable(&coder->outq)) {
+ assert(*out_pos == out_size);
+ break;
+ }
+
+ // If the application stops providing more input
+ // in the middle of a Block, there will eventually
+ // be one worker thread left that is stuck waiting for
+ // more input (that might never arrive) and a matching
+ // outbuf which the worker thread cannot finish due
+ // to lack of input. We must detect this situation,
+ // otherwise we would end up waiting indefinitely
+ // (if no timeout is in use) or keep returning
+ // LZMA_TIMED_OUT while making no progress. Thus, the
+ // application would never get LZMA_BUF_ERROR from
+ // lzma_code() which would tell the application that
+ // no more progress is possible. No LZMA_BUF_ERROR
+ // means that, for example, truncated .xz files could
+ // cause an infinite loop.
+ //
+ // A worker thread doing partial updates will
+ // store not only the output position in outbuf->pos
+ // but also the matching input position in
+ // outbuf->decoder_in_pos. Here we check if that
+ // input position matches the amount of input that
+ // the worker thread has been given (in_filled).
+ // If so, we must return and not wait as no more
+ // output will be coming without first getting more
+ // input to the worker thread. If the application
+ // keeps calling lzma_code() without providing more
+ // input, it will eventually get LZMA_BUF_ERROR.
+ //
+ // NOTE: We can read partial_update and in_filled
+ // without thr->mutex as only the main thread
+ // modifies these variables. decoder_in_pos requires
+ // coder->mutex which we are already holding.
+ if (coder->thr != NULL && coder->thr->partial_update
+ != PARTIAL_DISABLED) {
+ // There is exactly one outbuf in the queue.
+ assert(coder->thr->outbuf == coder->outq.head);
+ assert(coder->thr->outbuf == coder->outq.tail);
+
+ if (coder->thr->outbuf->decoder_in_pos
+ == coder->thr->in_filled)
+ break;
+ }
+
+ // Wait for input or output to become possible.
+ if (coder->timeout != 0) {
+ // See the comment in stream_encoder_mt.c
+ // about why mythread_condtime_set() is used
+ // like this.
+ //
+ // FIXME?
+ // In contrast to the encoder, this calls
+ // _condtime_set while the mutex is locked.
+ if (!*has_blocked) {
+ *has_blocked = true;
+ mythread_condtime_set(wait_abs,
+ &coder->cond,
+ coder->timeout);
+ }
+
+ if (mythread_cond_timedwait(&coder->cond,
+ &coder->mutex,
+ wait_abs) != 0) {
+ ret = LZMA_TIMED_OUT;
+ break;
+ }
+ } else {
+ mythread_cond_wait(&coder->cond,
+ &coder->mutex);
+ }
+ } while (ret == LZMA_OK);
+ }
+
+ // If we are returning an error, then the application cannot get
+ // more output from us and thus keeping the threads running is
+ // useless and waste of CPU time.
+ if (ret != LZMA_OK && ret != LZMA_TIMED_OUT)
+ threads_stop(coder);
+
+ return ret;
+}
+
+
+static lzma_ret
+decode_block_header(struct lzma_stream_coder *coder,
+ const lzma_allocator *allocator, const uint8_t *restrict in,
+ size_t *restrict in_pos, size_t in_size)
+{
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ if (coder->pos == 0) {
+ // Detect if it's Index.
+ if (in[*in_pos] == 0x00)
+ return LZMA_INDEX_DETECTED;
+
+ // Calculate the size of the Block Header. Note that
+ // Block Header decoder wants to see this byte too
+ // so don't advance *in_pos.
+ coder->block_options.header_size
+ = lzma_block_header_size_decode(
+ in[*in_pos]);
+ }
+
+ // Copy the Block Header to the internal buffer.
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ coder->block_options.header_size);
+
+ // Return if we didn't get the whole Block Header yet.
+ if (coder->pos < coder->block_options.header_size)
+ return LZMA_OK;
+
+ coder->pos = 0;
+
+ // Version 1 is needed to support the .ignore_check option.
+ coder->block_options.version = 1;
+
+ // Block Header decoder will initialize all members of this array
+ // so we don't need to do it here.
+ coder->block_options.filters = coder->filters;
+
+ // Decode the Block Header.
+ return_if_error(lzma_block_header_decode(&coder->block_options,
+ allocator, coder->buffer));
+
+ // If LZMA_IGNORE_CHECK was used, this flag needs to be set.
+ // It has to be set after lzma_block_header_decode() because
+ // it always resets this to false.
+ coder->block_options.ignore_check = coder->ignore_check;
+
+ // coder->block_options is ready now.
+ return LZMA_STREAM_END;
+}
+
+
+/// Get the size of the Compressed Data + Block Padding + Check.
+static size_t
+comp_blk_size(const struct lzma_stream_coder *coder)
+{
+ return vli_ceil4(coder->block_options.compressed_size)
+ + lzma_check_size(coder->stream_flags.check);
+}
+
+
+/// Returns true if the size (compressed or uncompressed) is such that
+/// threaded decompression cannot be used. Sizes that are too big compared
+/// to SIZE_MAX must be rejected to avoid integer overflows and truncations
+/// when lzma_vli is assigned to a size_t.
+static bool
+is_direct_mode_needed(lzma_vli size)
+{
+ return size == LZMA_VLI_UNKNOWN || size > SIZE_MAX / 3;
+}
+
+
+static lzma_ret
+stream_decoder_reset(struct lzma_stream_coder *coder,
+ const lzma_allocator *allocator)
+{
+ // Initialize the Index hash used to verify the Index.
+ coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
+ if (coder->index_hash == NULL)
+ return LZMA_MEM_ERROR;
+
+ // Reset the rest of the variables.
+ coder->sequence = SEQ_STREAM_HEADER;
+ coder->pos = 0;
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size, lzma_action action)
+{
+ struct lzma_stream_coder *coder = coder_ptr;
+
+ mythread_condtime wait_abs;
+ bool has_blocked = false;
+
+ // Determine if in SEQ_BLOCK_HEADER and SEQ_BLOCK_THR_RUN we should
+ // tell read_output_and_wait() to wait until it can fill the output
+ // buffer (or a timeout occurs). Two conditions must be met:
+ //
+ // (1) If the caller provided no new input. The reason for this
+ // can be, for example, the end of the file or that there is
+ // a pause in the input stream and more input is available
+ // a little later. In this situation we should wait for output
+ // because otherwise we would end up in a busy-waiting loop where
+ // we make no progress and the application just calls us again
+ // without providing any new input. This would then result in
+ // LZMA_BUF_ERROR even though more output would be available
+ // once the worker threads decode more data.
+ //
+ // (2) Even if (1) is true, we will not wait if the previous call to
+ // this function managed to produce some output and the output
+ // buffer became full. This is for compatibility with applications
+ // that call lzma_code() in such a way that new input is provided
+ // only when the output buffer didn't become full. Without this
+ // trick such applications would have bad performance (bad
+ // parallelization due to decoder not getting input fast enough).
+ //
+ // NOTE: Such loops might require that timeout is disabled (0)
+ // if they assume that output-not-full implies that all input has
+ // been consumed. If and only if timeout is enabled, we may return
+ // when output isn't full *and* not all input has been consumed.
+ //
+ // However, if LZMA_FINISH is used, the above is ignored and we always
+ // wait (timeout can still cause us to return) because we know that
+ // we won't get any more input. This matters if the input file is
+ // truncated and we are doing single-shot decoding, that is,
+ // timeout = 0 and LZMA_FINISH is used on the first call to
+ // lzma_code() and the output buffer is known to be big enough
+ // to hold all uncompressed data:
+ //
+ // - If LZMA_FINISH wasn't handled specially, we could return
+ // LZMA_OK before providing all output that is possible with the
+ // truncated input. The rest would be available if lzma_code() was
+ // called again but then it's not single-shot decoding anymore.
+ //
+ // - By handling LZMA_FINISH specially here, the first call will
+ // produce all the output, matching the behavior of the
+ // single-threaded decoder.
+ //
+ // So it's a very specific corner case but also easy to avoid. Note
+ // that this special handling of LZMA_FINISH has no effect for
+ // single-shot decoding when the input file is valid (not truncated);
+ // premature LZMA_OK wouldn't be possible as long as timeout = 0.
+ const bool waiting_allowed = action == LZMA_FINISH
+ || (*in_pos == in_size && !coder->out_was_filled);
+ coder->out_was_filled = false;
+
+ while (true)
+ switch (coder->sequence) {
+ case SEQ_STREAM_HEADER: {
+ // Copy the Stream Header to the internal buffer.
+ const size_t in_old = *in_pos;
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ LZMA_STREAM_HEADER_SIZE);
+ coder->progress_in += *in_pos - in_old;
+
+ // Return if we didn't get the whole Stream Header yet.
+ if (coder->pos < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_OK;
+
+ coder->pos = 0;
+
+ // Decode the Stream Header.
+ const lzma_ret ret = lzma_stream_header_decode(
+ &coder->stream_flags, coder->buffer);
+ if (ret != LZMA_OK)
+ return ret == LZMA_FORMAT_ERROR && !coder->first_stream
+ ? LZMA_DATA_ERROR : ret;
+
+ // If we are decoding concatenated Streams, and the later
+ // Streams have invalid Header Magic Bytes, we give
+ // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
+ coder->first_stream = false;
+
+ // Copy the type of the Check so that Block Header and Block
+ // decoders see it.
+ coder->block_options.check = coder->stream_flags.check;
+
+ // Even if we return LZMA_*_CHECK below, we want
+ // to continue from Block Header decoding.
+ coder->sequence = SEQ_BLOCK_HEADER;
+
+ // Detect if there's no integrity check or if it is
+ // unsupported if those were requested by the application.
+ if (coder->tell_no_check && coder->stream_flags.check
+ == LZMA_CHECK_NONE)
+ return LZMA_NO_CHECK;
+
+ if (coder->tell_unsupported_check
+ && !lzma_check_is_supported(
+ coder->stream_flags.check))
+ return LZMA_UNSUPPORTED_CHECK;
+
+ if (coder->tell_any_check)
+ return LZMA_GET_CHECK;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_HEADER: {
+ const size_t in_old = *in_pos;
+ const lzma_ret ret = decode_block_header(coder, allocator,
+ in, in_pos, in_size);
+ coder->progress_in += *in_pos - in_old;
+
+ if (ret == LZMA_OK) {
+ // We didn't decode the whole Block Header yet.
+ //
+ // Read output from the queue before returning. This
+ // is important because it is possible that the
+ // application doesn't have any new input available
+ // immediately. If we didn't try to copy output from
+ // the output queue here, lzma_code() could end up
+ // returning LZMA_BUF_ERROR even though queued output
+ // is available.
+ //
+ // If the lzma_code() call provided at least one input
+ // byte, only copy as much data from the output queue
+ // as is available immediately. This way the
+ // application will be able to provide more input
+ // without a delay.
+ //
+ // On the other hand, if lzma_code() was called with
+ // an empty input buffer(*), treat it specially: try
+ // to fill the output buffer even if it requires
+ // waiting for the worker threads to provide output
+ // (timeout, if specified, can still cause us to
+ // return).
+ //
+ // - This way the application will be able to get all
+ // data that can be decoded from the input provided
+ // so far.
+ //
+ // - We avoid both premature LZMA_BUF_ERROR and
+ // busy-waiting where the application repeatedly
+ // calls lzma_code() which immediately returns
+ // LZMA_OK without providing new data.
+ //
+ // - If the queue becomes empty, we won't wait
+ // anything and will return LZMA_OK immediately
+ // (coder->timeout is completely ignored).
+ //
+ // (*) See the comment at the beginning of this
+ // function how waiting_allowed is determined
+ // and why there is an exception to the rule
+ // of "called with an empty input buffer".
+ assert(*in_pos == in_size);
+
+ // If LZMA_FINISH was used we know that we won't get
+ // more input, so the file must be truncated if we
+ // get here. If worker threads don't detect any
+ // errors, eventually there will be no more output
+ // while we keep returning LZMA_OK which gets
+ // converted to LZMA_BUF_ERROR in lzma_code().
+ //
+ // If fail-fast is enabled then we will return
+ // immediately using LZMA_DATA_ERROR instead of
+ // LZMA_OK or LZMA_BUF_ERROR. Rationale for the
+ // error code:
+ //
+ // - Worker threads may have a large amount of
+ // not-yet-decoded input data and we don't
+ // know for sure if all data is valid. Bad
+ // data there would result in LZMA_DATA_ERROR
+ // when fail-fast isn't used.
+ //
+ // - Immediate LZMA_BUF_ERROR would be a bit weird
+ // considering the older liblzma code. lzma_code()
+ // even has an assertion to prevent coders from
+ // returning LZMA_BUF_ERROR directly.
+ //
+ // The downside of this is that with fail-fast apps
+ // cannot always distinguish between corrupt and
+ // truncated files.
+ if (action == LZMA_FINISH && coder->fail_fast) {
+ // We won't produce any more output. Stop
+ // the unfinished worker threads so they
+ // won't waste CPU time.
+ threads_stop(coder);
+ return LZMA_DATA_ERROR;
+ }
+
+ // read_output_and_wait() will call threads_stop()
+ // if needed so with that we can use return_if_error.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, waiting_allowed,
+ &wait_abs, &has_blocked));
+
+ if (coder->pending_error != LZMA_OK) {
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ return LZMA_OK;
+ }
+
+ if (ret == LZMA_INDEX_DETECTED) {
+ coder->sequence = SEQ_INDEX_WAIT_OUTPUT;
+ break;
+ }
+
+ // See if an error occurred.
+ if (ret != LZMA_STREAM_END) {
+ // NOTE: Here and in all other places where
+ // pending_error is set, it may overwrite the value
+ // (LZMA_PROG_ERROR) set by read_output_and_wait().
+ // That function might overwrite value set here too.
+ // These are fine because when read_output_and_wait()
+ // sets pending_error, it actually works as a flag
+ // variable only ("some error has occurred") and the
+ // actual value of pending_error is not used in
+ // SEQ_ERROR. In such cases SEQ_ERROR will eventually
+ // get the correct error code from the return value of
+ // a later read_output_and_wait() call.
+ coder->pending_error = ret;
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ // Calculate the memory usage of the filters / Block decoder.
+ coder->mem_next_filters = lzma_raw_decoder_memusage(
+ coder->filters);
+
+ if (coder->mem_next_filters == UINT64_MAX) {
+ // One or more unknown Filter IDs.
+ coder->pending_error = LZMA_OPTIONS_ERROR;
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ coder->sequence = SEQ_BLOCK_INIT;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_INIT: {
+ // Check if decoding is possible at all with the current
+ // memlimit_stop which we must never exceed.
+ //
+ // This needs to be the first thing in SEQ_BLOCK_INIT
+ // to make it possible to restart decoding after increasing
+ // memlimit_stop with lzma_memlimit_set().
+ if (coder->mem_next_filters > coder->memlimit_stop) {
+ // Flush pending output before returning
+ // LZMA_MEMLIMIT_ERROR. If the application doesn't
+ // want to increase the limit, at least it will get
+ // all the output possible so far.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, true, &wait_abs, &has_blocked));
+
+ if (!lzma_outq_is_empty(&coder->outq))
+ return LZMA_OK;
+
+ return LZMA_MEMLIMIT_ERROR;
+ }
+
+ // Check if the size information is available in Block Header.
+ // If it is, check if the sizes are small enough that we don't
+ // need to worry *too* much about integer overflows later in
+ // the code. If these conditions are not met, we must use the
+ // single-threaded direct mode.
+ if (is_direct_mode_needed(coder->block_options.compressed_size)
+ || is_direct_mode_needed(
+ coder->block_options.uncompressed_size)) {
+ coder->sequence = SEQ_BLOCK_DIRECT_INIT;
+ break;
+ }
+
+ // Calculate the amount of memory needed for the input and
+ // output buffers in threaded mode.
+ //
+ // These cannot overflow because we already checked that
+ // the sizes are small enough using is_direct_mode_needed().
+ coder->mem_next_in = comp_blk_size(coder);
+ const uint64_t mem_buffers = coder->mem_next_in
+ + lzma_outq_outbuf_memusage(
+ coder->block_options.uncompressed_size);
+
+ // Add the amount needed by the filters.
+ // Avoid integer overflows.
+ if (UINT64_MAX - mem_buffers < coder->mem_next_filters) {
+ // Use direct mode if the memusage would overflow.
+ // This is a theoretical case that shouldn't happen
+ // in practice unless the input file is weird (broken
+ // or malicious).
+ coder->sequence = SEQ_BLOCK_DIRECT_INIT;
+ break;
+ }
+
+ // Amount of memory needed to decode this Block in
+ // threaded mode:
+ coder->mem_next_block = coder->mem_next_filters + mem_buffers;
+
+ // If this alone would exceed memlimit_threading, then we must
+ // use the single-threaded direct mode.
+ if (coder->mem_next_block > coder->memlimit_threading) {
+ coder->sequence = SEQ_BLOCK_DIRECT_INIT;
+ break;
+ }
+
+ // Use the threaded mode. Free the direct mode decoder in
+ // case it has been initialized.
+ lzma_next_end(&coder->block_decoder, allocator);
+ coder->mem_direct_mode = 0;
+
+ // Since we already know what the sizes are supposed to be,
+ // we can already add them to the Index hash. The Block
+ // decoder will verify the values while decoding.
+ const lzma_ret ret = lzma_index_hash_append(coder->index_hash,
+ lzma_block_unpadded_size(
+ &coder->block_options),
+ coder->block_options.uncompressed_size);
+ if (ret != LZMA_OK) {
+ coder->pending_error = ret;
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ coder->sequence = SEQ_BLOCK_THR_INIT;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_THR_INIT: {
+ // We need to wait for a multiple conditions to become true
+ // until we can initialize the Block decoder and let a worker
+ // thread decode it:
+ //
+ // - Wait for the memory usage of the active threads to drop
+ // so that starting the decoding of this Block won't make
+ // us go over memlimit_threading.
+ //
+ // - Wait for at least one free output queue slot.
+ //
+ // - Wait for a free worker thread.
+ //
+ // While we wait, we must copy decompressed data to the out
+ // buffer and catch possible decoder errors.
+ //
+ // read_output_and_wait() does all the above.
+ bool block_can_start = false;
+
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ &block_can_start, true,
+ &wait_abs, &has_blocked));
+
+ if (coder->pending_error != LZMA_OK) {
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ if (!block_can_start) {
+ // It's not a timeout because return_if_error handles
+ // it already. Output queue cannot be empty either
+ // because in that case block_can_start would have
+ // been true. Thus the output buffer must be full and
+ // the queue isn't empty.
+ assert(*out_pos == out_size);
+ assert(!lzma_outq_is_empty(&coder->outq));
+ return LZMA_OK;
+ }
+
+ // We know that we can start decoding this Block without
+ // exceeding memlimit_threading. However, to stay below
+ // memlimit_threading may require freeing some of the
+ // cached memory.
+ //
+ // Get a local copy of variables that require locking the
+ // mutex. It is fine if the worker threads modify the real
+ // values after we read these as those changes can only be
+ // towards more favorable conditions (less memory in use,
+ // more in cache).
+ uint64_t mem_in_use;
+ uint64_t mem_cached;
+ struct worker_thread *thr = NULL; // Init to silence warning.
+
+ mythread_sync(coder->mutex) {
+ mem_in_use = coder->mem_in_use;
+ mem_cached = coder->mem_cached;
+ thr = coder->threads_free;
+ }
+
+ // The maximum amount of memory that can be held by other
+ // threads and cached buffers while allowing us to start
+ // decoding the next Block.
+ const uint64_t mem_max = coder->memlimit_threading
+ - coder->mem_next_block;
+
+ // If the existing allocations are so large that starting
+ // to decode this Block might exceed memlimit_threads,
+ // try to free memory from the output queue cache first.
+ //
+ // NOTE: This math assumes the worst case. It's possible
+ // that the limit wouldn't be exceeded if the existing cached
+ // allocations are reused.
+ if (mem_in_use + mem_cached + coder->outq.mem_allocated
+ > mem_max) {
+ // Clear the outq cache except leave one buffer in
+ // the cache if its size is correct. That way we
+ // don't free and almost immediately reallocate
+ // an identical buffer.
+ lzma_outq_clear_cache2(&coder->outq, allocator,
+ coder->block_options.uncompressed_size);
+ }
+
+ // If there is at least one worker_thread in the cache and
+ // the existing allocations are so large that starting to
+ // decode this Block might exceed memlimit_threads, free
+ // memory by freeing cached Block decoders.
+ //
+ // NOTE: The comparison is different here than above.
+ // Here we don't care about cached buffers in outq anymore
+ // and only look at memory actually in use. This is because
+ // if there is something in outq cache, it's a single buffer
+ // that can be used as is. We ensured this in the above
+ // if-block.
+ uint64_t mem_freed = 0;
+ if (thr != NULL && mem_in_use + mem_cached
+ + coder->outq.mem_in_use > mem_max) {
+ // Don't free the first Block decoder if its memory
+ // usage isn't greater than what this Block will need.
+ // Typically the same filter chain is used for all
+ // Blocks so this way the allocations can be reused
+ // when get_thread() picks the first worker_thread
+ // from the cache.
+ if (thr->mem_filters <= coder->mem_next_filters)
+ thr = thr->next;
+
+ while (thr != NULL) {
+ lzma_next_end(&thr->block_decoder, allocator);
+ mem_freed += thr->mem_filters;
+ thr->mem_filters = 0;
+ thr = thr->next;
+ }
+ }
+
+ // Update the memory usage counters. Note that coder->mem_*
+ // may have changed since we read them so we must substract
+ // or add the changes.
+ mythread_sync(coder->mutex) {
+ coder->mem_cached -= mem_freed;
+
+ // Memory needed for the filters and the input buffer.
+ // The output queue takes care of its own counter so
+ // we don't touch it here.
+ //
+ // NOTE: After this, coder->mem_in_use +
+ // coder->mem_cached might count the same thing twice.
+ // If so, this will get corrected in get_thread() when
+ // a worker_thread is picked from coder->free_threads
+ // and its memory usage is substracted from mem_cached.
+ coder->mem_in_use += coder->mem_next_in
+ + coder->mem_next_filters;
+ }
+
+ // Allocate memory for the output buffer in the output queue.
+ lzma_ret ret = lzma_outq_prealloc_buf(
+ &coder->outq, allocator,
+ coder->block_options.uncompressed_size);
+ if (ret != LZMA_OK) {
+ threads_stop(coder);
+ return ret;
+ }
+
+ // Set up coder->thr.
+ ret = get_thread(coder, allocator);
+ if (ret != LZMA_OK) {
+ threads_stop(coder);
+ return ret;
+ }
+
+ // The new Block decoder memory usage is already counted in
+ // coder->mem_in_use. Store it in the thread too.
+ coder->thr->mem_filters = coder->mem_next_filters;
+
+ // Initialize the Block decoder.
+ coder->thr->block_options = coder->block_options;
+ ret = lzma_block_decoder_init(
+ &coder->thr->block_decoder, allocator,
+ &coder->thr->block_options);
+
+ // Free the allocated filter options since they are needed
+ // only to initialize the Block decoder.
+ lzma_filters_free(coder->filters, allocator);
+ coder->thr->block_options.filters = NULL;
+
+ // Check if memory usage calculation and Block encoder
+ // initialization succeeded.
+ if (ret != LZMA_OK) {
+ coder->pending_error = ret;
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ // Allocate the input buffer.
+ coder->thr->in_size = coder->mem_next_in;
+ coder->thr->in = lzma_alloc(coder->thr->in_size, allocator);
+ if (coder->thr->in == NULL) {
+ threads_stop(coder);
+ return LZMA_MEM_ERROR;
+ }
+
+ // Get the preallocated output buffer.
+ coder->thr->outbuf = lzma_outq_get_buf(
+ &coder->outq, coder->thr);
+
+ // Start the decoder.
+ mythread_sync(coder->thr->mutex) {
+ assert(coder->thr->state == THR_IDLE);
+ coder->thr->state = THR_RUN;
+ mythread_cond_signal(&coder->thr->cond);
+ }
+
+ // Enable output from the thread that holds the oldest output
+ // buffer in the output queue (if such a thread exists).
+ mythread_sync(coder->mutex) {
+ lzma_outq_enable_partial_output(&coder->outq,
+ &worker_enable_partial_update);
+ }
+
+ coder->sequence = SEQ_BLOCK_THR_RUN;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_THR_RUN: {
+ if (action == LZMA_FINISH && coder->fail_fast) {
+ // We know that we won't get more input and that
+ // the caller wants fail-fast behavior. If we see
+ // that we don't have enough input to finish this
+ // Block, return LZMA_DATA_ERROR immediately.
+ // See SEQ_BLOCK_HEADER for the error code rationale.
+ const size_t in_avail = in_size - *in_pos;
+ const size_t in_needed = coder->thr->in_size
+ - coder->thr->in_filled;
+ if (in_avail < in_needed) {
+ threads_stop(coder);
+ return LZMA_DATA_ERROR;
+ }
+ }
+
+ // Copy input to the worker thread.
+ size_t cur_in_filled = coder->thr->in_filled;
+ lzma_bufcpy(in, in_pos, in_size, coder->thr->in,
+ &cur_in_filled, coder->thr->in_size);
+
+ // Tell the thread how much we copied.
+ mythread_sync(coder->thr->mutex) {
+ coder->thr->in_filled = cur_in_filled;
+
+ // NOTE: Most of the time we are copying input faster
+ // than the thread can decode so most of the time
+ // calling mythread_cond_signal() is useless but
+ // we cannot make it conditional because thr->in_pos
+ // is updated without a mutex. And the overhead should
+ // be very much negligible anyway.
+ mythread_cond_signal(&coder->thr->cond);
+ }
+
+ // Read output from the output queue. Just like in
+ // SEQ_BLOCK_HEADER, we wait to fill the output buffer
+ // only if waiting_allowed was set to true in the beginning
+ // of this function (see the comment there).
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, waiting_allowed,
+ &wait_abs, &has_blocked));
+
+ if (coder->pending_error != LZMA_OK) {
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ // Return if the input didn't contain the whole Block.
+ if (coder->thr->in_filled < coder->thr->in_size) {
+ assert(*in_pos == in_size);
+ return LZMA_OK;
+ }
+
+ // The whole Block has been copied to the thread-specific
+ // buffer. Continue from the next Block Header or Index.
+ coder->thr = NULL;
+ coder->sequence = SEQ_BLOCK_HEADER;
+ break;
+ }
+
+ case SEQ_BLOCK_DIRECT_INIT: {
+ // Wait for the threads to finish and that all decoded data
+ // has been copied to the output. That is, wait until the
+ // output queue becomes empty.
+ //
+ // NOTE: No need to check for coder->pending_error as
+ // we aren't consuming any input until the queue is empty
+ // and if there is a pending error, read_output_and_wait()
+ // will eventually return it before the queue is empty.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, true, &wait_abs, &has_blocked));
+ if (!lzma_outq_is_empty(&coder->outq))
+ return LZMA_OK;
+
+ // Free the cached output buffers.
+ lzma_outq_clear_cache(&coder->outq, allocator);
+
+ // Get rid of the worker threads, including the coder->threads
+ // array.
+ threads_end(coder, allocator);
+
+ // Initialize the Block decoder.
+ const lzma_ret ret = lzma_block_decoder_init(
+ &coder->block_decoder, allocator,
+ &coder->block_options);
+
+ // Free the allocated filter options since they are needed
+ // only to initialize the Block decoder.
+ lzma_filters_free(coder->filters, allocator);
+ coder->block_options.filters = NULL;
+
+ // Check if Block decoder initialization succeeded.
+ if (ret != LZMA_OK)
+ return ret;
+
+ // Make the memory usage visible to _memconfig().
+ coder->mem_direct_mode = coder->mem_next_filters;
+
+ coder->sequence = SEQ_BLOCK_DIRECT_RUN;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_DIRECT_RUN: {
+ const size_t in_old = *in_pos;
+ const size_t out_old = *out_pos;
+ const lzma_ret ret = coder->block_decoder.code(
+ coder->block_decoder.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size,
+ action);
+ coder->progress_in += *in_pos - in_old;
+ coder->progress_out += *out_pos - out_old;
+
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ // Block decoded successfully. Add the new size pair to
+ // the Index hash.
+ return_if_error(lzma_index_hash_append(coder->index_hash,
+ lzma_block_unpadded_size(
+ &coder->block_options),
+ coder->block_options.uncompressed_size));
+
+ coder->sequence = SEQ_BLOCK_HEADER;
+ break;
+ }
+
+ case SEQ_INDEX_WAIT_OUTPUT:
+ // Flush the output from all worker threads so that we can
+ // decode the Index without thinking about threading.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, true, &wait_abs, &has_blocked));
+
+ if (!lzma_outq_is_empty(&coder->outq))
+ return LZMA_OK;
+
+ coder->sequence = SEQ_INDEX_DECODE;
+
+ // Fall through
+
+ case SEQ_INDEX_DECODE: {
+ // If we don't have any input, don't call
+ // lzma_index_hash_decode() since it would return
+ // LZMA_BUF_ERROR, which we must not do here.
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ // Decode the Index and compare it to the hash calculated
+ // from the sizes of the Blocks (if any).
+ const size_t in_old = *in_pos;
+ const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
+ in, in_pos, in_size);
+ coder->progress_in += *in_pos - in_old;
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ coder->sequence = SEQ_STREAM_FOOTER;
+ }
+
+ // Fall through
+
+ case SEQ_STREAM_FOOTER: {
+ // Copy the Stream Footer to the internal buffer.
+ const size_t in_old = *in_pos;
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ LZMA_STREAM_HEADER_SIZE);
+ coder->progress_in += *in_pos - in_old;
+
+ // Return if we didn't get the whole Stream Footer yet.
+ if (coder->pos < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_OK;
+
+ coder->pos = 0;
+
+ // Decode the Stream Footer. The decoder gives
+ // LZMA_FORMAT_ERROR if the magic bytes don't match,
+ // so convert that return code to LZMA_DATA_ERROR.
+ lzma_stream_flags footer_flags;
+ const lzma_ret ret = lzma_stream_footer_decode(
+ &footer_flags, coder->buffer);
+ if (ret != LZMA_OK)
+ return ret == LZMA_FORMAT_ERROR
+ ? LZMA_DATA_ERROR : ret;
+
+ // Check that Index Size stored in the Stream Footer matches
+ // the real size of the Index field.
+ if (lzma_index_hash_size(coder->index_hash)
+ != footer_flags.backward_size)
+ return LZMA_DATA_ERROR;
+
+ // Compare that the Stream Flags fields are identical in
+ // both Stream Header and Stream Footer.
+ return_if_error(lzma_stream_flags_compare(
+ &coder->stream_flags, &footer_flags));
+
+ if (!coder->concatenated)
+ return LZMA_STREAM_END;
+
+ coder->sequence = SEQ_STREAM_PADDING;
+ }
+
+ // Fall through
+
+ case SEQ_STREAM_PADDING:
+ assert(coder->concatenated);
+
+ // Skip over possible Stream Padding.
+ while (true) {
+ if (*in_pos >= in_size) {
+ // Unless LZMA_FINISH was used, we cannot
+ // know if there's more input coming later.
+ if (action != LZMA_FINISH)
+ return LZMA_OK;
+
+ // Stream Padding must be a multiple of
+ // four bytes.
+ return coder->pos == 0
+ ? LZMA_STREAM_END
+ : LZMA_DATA_ERROR;
+ }
+
+ // If the byte is not zero, it probably indicates
+ // beginning of a new Stream (or the file is corrupt).
+ if (in[*in_pos] != 0x00)
+ break;
+
+ ++*in_pos;
+ ++coder->progress_in;
+ coder->pos = (coder->pos + 1) & 3;
+ }
+
+ // Stream Padding must be a multiple of four bytes (empty
+ // Stream Padding is OK).
+ if (coder->pos != 0) {
+ ++*in_pos;
+ ++coder->progress_in;
+ return LZMA_DATA_ERROR;
+ }
+
+ // Prepare to decode the next Stream.
+ return_if_error(stream_decoder_reset(coder, allocator));
+ break;
+
+ case SEQ_ERROR:
+ if (!coder->fail_fast) {
+ // Let the application get all data before the point
+ // where the error was detected. This matches the
+ // behavior of single-threaded use.
+ //
+ // FIXME? Some errors (LZMA_MEM_ERROR) don't get here,
+ // they are returned immediately. Thus in rare cases
+ // the output will be less than in the single-threaded
+ // mode. Maybe this doesn't matter much in practice.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, true, &wait_abs, &has_blocked));
+
+ // We get here only if the error happened in the main
+ // thread, for example, unsupported Block Header.
+ if (!lzma_outq_is_empty(&coder->outq))
+ return LZMA_OK;
+ }
+
+ // We only get here if no errors were detected by the worker
+ // threads. Errors from worker threads would have already been
+ // returned by the call to read_output_and_wait() above.
+ return coder->pending_error;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ // Never reached
+}
+
+
+static void
+stream_decoder_mt_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ struct lzma_stream_coder *coder = coder_ptr;
+
+ threads_end(coder, allocator);
+ lzma_outq_end(&coder->outq, allocator);
+
+ lzma_next_end(&coder->block_decoder, allocator);
+ lzma_filters_free(coder->filters, allocator);
+ lzma_index_hash_end(coder->index_hash, allocator);
+
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_check
+stream_decoder_mt_get_check(const void *coder_ptr)
+{
+ const struct lzma_stream_coder *coder = coder_ptr;
+ return coder->stream_flags.check;
+}
+
+
+static lzma_ret
+stream_decoder_mt_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ // NOTE: This function gets/sets memlimit_stop. For now,
+ // memlimit_threading cannot be modified after initialization.
+ //
+ // *memusage will include cached memory too. Excluding cached memory
+ // would be misleading and it wouldn't help the applications to
+ // know how much memory is actually needed to decompress the file
+ // because the higher the number of threads and the memlimits are
+ // the more memory the decoder may use.
+ //
+ // Setting a new limit includes the cached memory too and too low
+ // limits will be rejected. Alternative could be to free the cached
+ // memory immediately if that helps to bring the limit down but
+ // the current way is the simplest. It's unlikely that limit needs
+ // to be lowered in the middle of a file anyway; the typical reason
+ // to want a new limit is to increase after LZMA_MEMLIMIT_ERROR
+ // and even such use isn't common.
+ struct lzma_stream_coder *coder = coder_ptr;
+
+ mythread_sync(coder->mutex) {
+ *memusage = coder->mem_direct_mode
+ + coder->mem_in_use
+ + coder->mem_cached
+ + coder->outq.mem_allocated;
+ }
+
+ // If no filter chains are allocated, *memusage may be zero.
+ // Always return at least LZMA_MEMUSAGE_BASE.
+ if (*memusage < LZMA_MEMUSAGE_BASE)
+ *memusage = LZMA_MEMUSAGE_BASE;
+
+ *old_memlimit = coder->memlimit_stop;
+
+ if (new_memlimit != 0) {
+ if (new_memlimit < *memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ coder->memlimit_stop = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+stream_decoder_mt_get_progress(void *coder_ptr,
+ uint64_t *progress_in, uint64_t *progress_out)
+{
+ struct lzma_stream_coder *coder = coder_ptr;
+
+ // Lock coder->mutex to prevent finishing threads from moving their
+ // progress info from the worker_thread structure to lzma_stream_coder.
+ mythread_sync(coder->mutex) {
+ *progress_in = coder->progress_in;
+ *progress_out = coder->progress_out;
+
+ for (size_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ *progress_in += coder->threads[i].progress_in;
+ *progress_out += coder->threads[i]
+ .progress_out;
+ }
+ }
+ }
+
+ return;
+}
+
+
+static lzma_ret
+stream_decoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_mt *options)
+{
+ struct lzma_stream_coder *coder;
+
+ if (options->threads == 0 || options->threads > LZMA_THREADS_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ if (options->flags & ~LZMA_SUPPORTED_FLAGS)
+ return LZMA_OPTIONS_ERROR;
+
+ lzma_next_coder_init(&stream_decoder_mt_init, next, allocator);
+
+ coder = next->coder;
+ if (!coder) {
+ coder = lzma_alloc(sizeof(struct lzma_stream_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+
+ if (mythread_mutex_init(&coder->mutex)) {
+ lzma_free(coder, allocator);
+ return LZMA_MEM_ERROR;
+ }
+
+ if (mythread_cond_init(&coder->cond)) {
+ mythread_mutex_destroy(&coder->mutex);
+ lzma_free(coder, allocator);
+ return LZMA_MEM_ERROR;
+ }
+
+ next->code = &stream_decode_mt;
+ next->end = &stream_decoder_mt_end;
+ next->get_check = &stream_decoder_mt_get_check;
+ next->memconfig = &stream_decoder_mt_memconfig;
+ next->get_progress = &stream_decoder_mt_get_progress;
+
+ coder->filters[0].id = LZMA_VLI_UNKNOWN;
+ memzero(&coder->outq, sizeof(coder->outq));
+
+ coder->block_decoder = LZMA_NEXT_CODER_INIT;
+ coder->mem_direct_mode = 0;
+
+ coder->index_hash = NULL;
+ coder->threads = NULL;
+ coder->threads_free = NULL;
+ coder->threads_initialized = 0;
+ }
+
+ // Cleanup old filter chain if one remains after unfinished decoding
+ // of a previous Stream.
+ lzma_filters_free(coder->filters, allocator);
+
+ // By allocating threads from scratch we can start memory-usage
+ // accounting from scratch, too. Changes in filter and block sizes may
+ // affect number of threads.
+ //
+ // FIXME? Reusing should be easy but unlike the single-threaded
+ // decoder, with some types of input file combinations reusing
+ // could leave quite a lot of memory allocated but unused (first
+ // file could allocate a lot, the next files could use fewer
+ // threads and some of the allocations from the first file would not
+ // get freed unless memlimit_threading forces us to clear caches).
+ //
+ // NOTE: The direct mode decoder isn't freed here if one exists.
+ // It will be reused or freed as needed in the main loop.
+ threads_end(coder, allocator);
+
+ // All memusage counters start at 0 (including mem_direct_mode).
+ // The little extra that is needed for the structs in this file
+ // get accounted well enough by the filter chain memory usage
+ // which adds LZMA_MEMUSAGE_BASE for each chain. However,
+ // stream_decoder_mt_memconfig() has to handle this specially so that
+ // it will never return less than LZMA_MEMUSAGE_BASE as memory usage.
+ coder->mem_in_use = 0;
+ coder->mem_cached = 0;
+ coder->mem_next_block = 0;
+
+ coder->progress_in = 0;
+ coder->progress_out = 0;
+
+ coder->sequence = SEQ_STREAM_HEADER;
+ coder->thread_error = LZMA_OK;
+ coder->pending_error = LZMA_OK;
+ coder->thr = NULL;
+
+ coder->timeout = options->timeout;
+
+ coder->memlimit_threading = my_max(1, options->memlimit_threading);
+ coder->memlimit_stop = my_max(1, options->memlimit_stop);
+ if (coder->memlimit_threading > coder->memlimit_stop)
+ coder->memlimit_threading = coder->memlimit_stop;
+
+ coder->tell_no_check = (options->flags & LZMA_TELL_NO_CHECK) != 0;
+ coder->tell_unsupported_check
+ = (options->flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
+ coder->tell_any_check = (options->flags & LZMA_TELL_ANY_CHECK) != 0;
+ coder->ignore_check = (options->flags & LZMA_IGNORE_CHECK) != 0;
+ coder->concatenated = (options->flags & LZMA_CONCATENATED) != 0;
+ coder->fail_fast = (options->flags & LZMA_FAIL_FAST) != 0;
+
+ coder->first_stream = true;
+ coder->out_was_filled = false;
+ coder->pos = 0;
+
+ coder->threads_max = options->threads;
+
+ return_if_error(lzma_outq_init(&coder->outq, allocator,
+ coder->threads_max));
+
+ return stream_decoder_reset(coder, allocator);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_decoder_mt(lzma_stream *strm, const lzma_mt *options)
+{
+ lzma_next_strm_init(stream_decoder_mt_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/contrib/libs/lzma/liblzma/common/stream_encoder.c b/contrib/libs/lzma/liblzma/common/stream_encoder.c
index 858cba473a..ee92046018 100644
--- a/contrib/libs/lzma/liblzma/common/stream_encoder.c
+++ b/contrib/libs/lzma/liblzma/common/stream_encoder.c
@@ -219,8 +219,7 @@ stream_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
lzma_next_end(&coder->index_encoder, allocator);
lzma_index_end(coder->index, allocator);
- for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
- lzma_free(coder->filters[i].options, allocator);
+ lzma_filters_free(coder->filters, allocator);
lzma_free(coder, allocator);
return;
@@ -233,6 +232,13 @@ stream_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
const lzma_filter *reversed_filters)
{
lzma_stream_coder *coder = coder_ptr;
+ lzma_ret ret;
+
+ // Make a copy to a temporary buffer first. This way it is easier
+ // to keep the encoder state unchanged if an error occurs with
+ // lzma_filters_copy().
+ lzma_filter temp[LZMA_FILTERS_MAX + 1];
+ return_if_error(lzma_filters_copy(filters, temp, allocator));
if (coder->sequence <= SEQ_BLOCK_INIT) {
// There is no incomplete Block waiting to be finished,
@@ -240,31 +246,40 @@ stream_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
// trying to initialize the Block encoder with the new
// chain. This way we detect if the chain is valid.
coder->block_encoder_is_initialized = false;
- coder->block_options.filters = (lzma_filter *)(filters);
- const lzma_ret ret = block_encoder_init(coder, allocator);
+ coder->block_options.filters = temp;
+ ret = block_encoder_init(coder, allocator);
coder->block_options.filters = coder->filters;
if (ret != LZMA_OK)
- return ret;
+ goto error;
coder->block_encoder_is_initialized = true;
} else if (coder->sequence <= SEQ_BLOCK_ENCODE) {
// We are in the middle of a Block. Try to update only
// the filter-specific options.
- return_if_error(coder->block_encoder.update(
+ ret = coder->block_encoder.update(
coder->block_encoder.coder, allocator,
- filters, reversed_filters));
+ filters, reversed_filters);
+ if (ret != LZMA_OK)
+ goto error;
} else {
// Trying to update the filter chain when we are already
// encoding Index or Stream Footer.
- return LZMA_PROG_ERROR;
+ ret = LZMA_PROG_ERROR;
+ goto error;
}
- // Free the copy of the old chain and make a copy of the new chain.
- for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
- lzma_free(coder->filters[i].options, allocator);
+ // Free the options of the old chain.
+ lzma_filters_free(coder->filters, allocator);
+
+ // Copy the new filter chain in place.
+ memcpy(coder->filters, temp, sizeof(temp));
+
+ return LZMA_OK;
- return lzma_filters_copy(filters, coder->filters, allocator);
+error:
+ lzma_filters_free(temp, allocator);
+ return ret;
}
@@ -319,7 +334,7 @@ stream_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
// Initialize the Block encoder. This way we detect unsupported
// filter chains when initializing the Stream encoder instead of
- // giving an error after Stream Header has already written out.
+ // giving an error after Stream Header has already been written out.
return stream_encoder_update(coder, allocator, filters, NULL);
}
diff --git a/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c b/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c
index 01e4033975..f4497c10b9 100644
--- a/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c
+++ b/contrib/libs/lzma/liblzma/common/stream_encoder_mt.c
@@ -85,6 +85,11 @@ struct worker_thread_s {
/// Compression options for this Block
lzma_block block_options;
+ /// Filter chain for this thread. By copying the filters array
+ /// to each thread it is possible to change the filter chain
+ /// between Blocks using lzma_filters_update().
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
/// Next structure in the stack of free worker threads.
worker_thread *next;
@@ -109,9 +114,22 @@ struct lzma_stream_coder_s {
/// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier.
size_t block_size;
- /// The filter chain currently in use
+ /// The filter chain to use for the next Block.
+ /// This can be updated using lzma_filters_update()
+ /// after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH.
lzma_filter filters[LZMA_FILTERS_MAX + 1];
+ /// A copy of filters[] will be put here when attempting to get
+ /// a new worker thread. This will be copied to a worker thread
+ /// when a thread becomes free and then this cache is marked as
+ /// empty by setting [0].id = LZMA_VLI_UNKNOWN. Without this cache
+ /// the filter options from filters[] would get uselessly copied
+ /// multiple times (allocated and freed) when waiting for a new free
+ /// worker thread.
+ ///
+ /// This is freed if filters[] is updated via lzma_filters_update().
+ lzma_filter filters_cache[LZMA_FILTERS_MAX + 1];
+
/// Index to hold sizes of the Blocks
lzma_index *index;
@@ -133,6 +151,9 @@ struct lzma_stream_coder_s {
/// Output buffer queue for compressed data
lzma_outq outq;
+ /// How much memory to allocate for each lzma_outbuf.buf
+ size_t outbuf_alloc_size;
+
/// Maximum wait time if cannot use all the input and cannot
/// fill the output buffer. This is in milliseconds.
@@ -196,7 +217,7 @@ worker_error(worker_thread *thr, lzma_ret ret)
static worker_state
-worker_encode(worker_thread *thr, worker_state state)
+worker_encode(worker_thread *thr, size_t *out_pos, worker_state state)
{
assert(thr->progress_in == 0);
assert(thr->progress_out == 0);
@@ -205,12 +226,9 @@ worker_encode(worker_thread *thr, worker_state state)
thr->block_options = (lzma_block){
.version = 0,
.check = thr->coder->stream_flags.check,
- .compressed_size = thr->coder->outq.buf_size_max,
+ .compressed_size = thr->outbuf->allocated,
.uncompressed_size = thr->coder->block_size,
-
- // TODO: To allow changing the filter chain, the filters
- // array must be copied to each worker_thread.
- .filters = thr->coder->filters,
+ .filters = thr->filters,
};
// Calculate maximum size of the Block Header. This amount is
@@ -234,12 +252,12 @@ worker_encode(worker_thread *thr, worker_state state)
size_t in_pos = 0;
size_t in_size = 0;
- thr->outbuf->size = thr->block_options.header_size;
- const size_t out_size = thr->coder->outq.buf_size_max;
+ *out_pos = thr->block_options.header_size;
+ const size_t out_size = thr->outbuf->allocated;
do {
mythread_sync(thr->mutex) {
- // Store in_pos and out_pos into *thr so that
+ // Store in_pos and *out_pos into *thr so that
// an application may read them via
// lzma_get_progress() to get progress information.
//
@@ -247,7 +265,7 @@ worker_encode(worker_thread *thr, worker_state state)
// finishes. Instead, the final values are taken
// later from thr->outbuf.
thr->progress_in = in_pos;
- thr->progress_out = thr->outbuf->size;
+ thr->progress_out = *out_pos;
while (in_size == thr->in_size
&& thr->state == THR_RUN)
@@ -277,8 +295,8 @@ worker_encode(worker_thread *thr, worker_state state)
ret = thr->block_encoder.code(
thr->block_encoder.coder, thr->allocator,
thr->in, &in_pos, in_limit, thr->outbuf->buf,
- &thr->outbuf->size, out_size, action);
- } while (ret == LZMA_OK && thr->outbuf->size < out_size);
+ out_pos, out_size, action);
+ } while (ret == LZMA_OK && *out_pos < out_size);
switch (ret) {
case LZMA_STREAM_END:
@@ -313,10 +331,10 @@ worker_encode(worker_thread *thr, worker_state state)
return state;
// Do the encoding. This takes care of the Block Header too.
- thr->outbuf->size = 0;
+ *out_pos = 0;
ret = lzma_block_uncomp_encode(&thr->block_options,
thr->in, in_size, thr->outbuf->buf,
- &thr->outbuf->size, out_size);
+ out_pos, out_size);
// It shouldn't fail.
if (ret != LZMA_OK) {
@@ -367,11 +385,13 @@ worker_start(void *thr_ptr)
}
}
+ size_t out_pos = 0;
+
assert(state != THR_IDLE);
assert(state != THR_STOP);
if (state <= THR_FINISH)
- state = worker_encode(thr, state);
+ state = worker_encode(thr, &out_pos, state);
if (state == THR_EXIT)
break;
@@ -387,14 +407,17 @@ worker_start(void *thr_ptr)
}
mythread_sync(thr->coder->mutex) {
- // Mark the output buffer as finished if
- // no errors occurred.
- thr->outbuf->finished = state == THR_FINISH;
+ // If no errors occurred, make the encoded data
+ // available to be copied out.
+ if (state == THR_FINISH) {
+ thr->outbuf->pos = out_pos;
+ thr->outbuf->finished = true;
+ }
// Update the main progress info.
thr->coder->progress_in
+= thr->outbuf->uncompressed_size;
- thr->coder->progress_out += thr->outbuf->size;
+ thr->coder->progress_out += out_pos;
thr->progress_in = 0;
thr->progress_out = 0;
@@ -407,6 +430,8 @@ worker_start(void *thr_ptr)
}
// Exiting, free the resources.
+ lzma_filters_free(thr->filters, thr->allocator);
+
mythread_mutex_destroy(&thr->mutex);
mythread_cond_destroy(&thr->cond);
@@ -490,6 +515,7 @@ initialize_new_thread(lzma_stream_coder *coder,
thr->progress_in = 0;
thr->progress_out = 0;
thr->block_encoder = LZMA_NEXT_CODER_INIT;
+ thr->filters[0].id = LZMA_VLI_UNKNOWN;
if (mythread_create(&thr->thread_id, &worker_start, thr))
goto error_thread;
@@ -519,6 +545,18 @@ get_thread(lzma_stream_coder *coder, const lzma_allocator *allocator)
if (!lzma_outq_has_buf(&coder->outq))
return LZMA_OK;
+ // That's also true if we cannot allocate memory for the output
+ // buffer in the output queue.
+ return_if_error(lzma_outq_prealloc_buf(&coder->outq, allocator,
+ coder->outbuf_alloc_size));
+
+ // Make a thread-specific copy of the filter chain. Put it in
+ // the cache array first so that if we cannot get a new thread yet,
+ // the allocation is ready when we try again.
+ if (coder->filters_cache[0].id == LZMA_VLI_UNKNOWN)
+ return_if_error(lzma_filters_copy(
+ coder->filters, coder->filters_cache, allocator));
+
// If there is a free structure on the stack, use it.
mythread_sync(coder->mutex) {
if (coder->threads_free != NULL) {
@@ -541,7 +579,16 @@ get_thread(lzma_stream_coder *coder, const lzma_allocator *allocator)
mythread_sync(coder->thr->mutex) {
coder->thr->state = THR_RUN;
coder->thr->in_size = 0;
- coder->thr->outbuf = lzma_outq_get_buf(&coder->outq);
+ coder->thr->outbuf = lzma_outq_get_buf(&coder->outq, NULL);
+
+ // Free the old thread-specific filter options and replace
+ // them with the already-allocated new options from
+ // coder->filters_cache[]. Then mark the cache as empty.
+ lzma_filters_free(coder->thr->filters, allocator);
+ memcpy(coder->thr->filters, coder->filters_cache,
+ sizeof(coder->filters_cache));
+ coder->filters_cache[0].id = LZMA_VLI_UNKNOWN;
+
mythread_cond_signal(&coder->thr->cond);
}
@@ -627,9 +674,13 @@ wait_for_work(lzma_stream_coder *coder, mythread_condtime *wait_abs,
// to true here and calculate the absolute time when
// we must return if there's nothing to do.
//
- // The idea of *has_blocked is to avoid unneeded calls
- // to mythread_condtime_set(), which may do a syscall
- // depending on the operating system.
+ // This way if we block multiple times for short moments
+ // less than "timeout" milliseconds, we will return once
+ // "timeout" amount of time has passed since the *first*
+ // blocking occurred. If the absolute time was calculated
+ // again every time we block, "timeout" would effectively
+ // be meaningless if we never consecutively block longer
+ // than "timeout" ms.
*has_blocked = true;
mythread_condtime_set(wait_abs, &coder->cond, coder->timeout);
}
@@ -704,7 +755,7 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
}
// Try to read compressed data to out[].
- ret = lzma_outq_read(&coder->outq,
+ ret = lzma_outq_read(&coder->outq, allocator,
out, out_pos, out_size,
&unpadded_size,
&uncompressed_size);
@@ -715,6 +766,10 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
ret = lzma_index_append(coder->index,
allocator, unpadded_size,
uncompressed_size);
+ if (ret != LZMA_OK) {
+ threads_stop(coder, false);
+ return ret;
+ }
// If we didn't fill the output buffer yet,
// try to read more data. Maybe the next
@@ -724,8 +779,7 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
}
if (ret != LZMA_OK) {
- // coder->thread_error was set or
- // lzma_index_append() failed.
+ // coder->thread_error was set.
threads_stop(coder, false);
return ret;
}
@@ -846,8 +900,8 @@ stream_encoder_mt_end(void *coder_ptr, const lzma_allocator *allocator)
threads_end(coder, allocator);
lzma_outq_end(&coder->outq, allocator);
- for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
- lzma_free(coder->filters[i].options, allocator);
+ lzma_filters_free(coder->filters, allocator);
+ lzma_filters_free(coder->filters_cache, allocator);
lzma_next_end(&coder->index_encoder, allocator);
lzma_index_end(coder->index, allocator);
@@ -860,6 +914,45 @@ stream_encoder_mt_end(void *coder_ptr, const lzma_allocator *allocator)
}
+static lzma_ret
+stream_encoder_mt_update(void *coder_ptr, const lzma_allocator *allocator,
+ const lzma_filter *filters,
+ const lzma_filter *reversed_filters
+ lzma_attribute((__unused__)))
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ // Applications shouldn't attempt to change the options when
+ // we are already encoding the Index or Stream Footer.
+ if (coder->sequence > SEQ_BLOCK)
+ return LZMA_PROG_ERROR;
+
+ // For now the threaded encoder doesn't support changing
+ // the options in the middle of a Block.
+ if (coder->thr != NULL)
+ return LZMA_PROG_ERROR;
+
+ // Check if the filter chain seems mostly valid. See the comment
+ // in stream_encoder_mt_init().
+ if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ // Make a copy to a temporary buffer first. This way the encoder
+ // state stays unchanged if an error occurs in lzma_filters_copy().
+ lzma_filter temp[LZMA_FILTERS_MAX + 1];
+ return_if_error(lzma_filters_copy(filters, temp, allocator));
+
+ // Free the options of the old chain as well as the cache.
+ lzma_filters_free(coder->filters, allocator);
+ lzma_filters_free(coder->filters_cache, allocator);
+
+ // Copy the new filter chain in place.
+ memcpy(coder->filters, temp, sizeof(temp));
+
+ return LZMA_OK;
+}
+
+
/// Options handling for lzma_stream_encoder_mt_init() and
/// lzma_stream_encoder_mt_memusage()
static lzma_ret
@@ -951,14 +1044,16 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
&block_size, &outbuf_size_max));
#if SIZE_MAX < UINT64_MAX
- if (block_size > SIZE_MAX)
+ if (block_size > SIZE_MAX || outbuf_size_max > SIZE_MAX)
return LZMA_MEM_ERROR;
#endif
// Validate the filter chain so that we can give an error in this
// function instead of delaying it to the first call to lzma_code().
// The memory usage calculation verifies the filter chain as
- // a side effect so we take advantage of that.
+ // a side effect so we take advantage of that. It's not a perfect
+ // check though as raw encoder allows LZMA1 too but such problems
+ // will be caught eventually with Block Header encoder.
if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
return LZMA_OPTIONS_ERROR;
@@ -998,9 +1093,10 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
next->code = &stream_encode_mt;
next->end = &stream_encoder_mt_end;
next->get_progress = &get_progress;
-// next->update = &stream_encoder_mt_update;
+ next->update = &stream_encoder_mt_update;
coder->filters[0].id = LZMA_VLI_UNKNOWN;
+ coder->filters_cache[0].id = LZMA_VLI_UNKNOWN;
coder->index_encoder = LZMA_NEXT_CODER_INIT;
coder->index = NULL;
memzero(&coder->outq, sizeof(coder->outq));
@@ -1012,6 +1108,7 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
// Basic initializations
coder->sequence = SEQ_STREAM_HEADER;
coder->block_size = (size_t)(block_size);
+ coder->outbuf_alloc_size = (size_t)(outbuf_size_max);
coder->thread_error = LZMA_OK;
coder->thr = NULL;
@@ -1041,15 +1138,16 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
// Output queue
return_if_error(lzma_outq_init(&coder->outq, allocator,
- outbuf_size_max, options->threads));
+ options->threads));
// Timeout
coder->timeout = options->timeout;
- // Free the old filter chain and copy the new one.
- for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
- lzma_free(coder->filters[i].options, allocator);
+ // Free the old filter chain and the cache.
+ lzma_filters_free(coder->filters, allocator);
+ lzma_filters_free(coder->filters_cache, allocator);
+ // Copy the new filter chain.
return_if_error(lzma_filters_copy(
filters, coder->filters, allocator));
@@ -1075,6 +1173,31 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
}
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// These are for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+// Actually that patch didn't create lzma_stream_encoder_mt@XZ_5.2.2
+// but it has been added here anyway since someone might misread the
+// RHEL patch and think both @XZ_5.1.2alpha and @XZ_5.2.2 exist.
+LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.1.2alpha",
+ lzma_ret, lzma_stream_encoder_mt_512a)(
+ lzma_stream *strm, const lzma_mt *options)
+ lzma_nothrow lzma_attr_warn_unused_result
+ __attribute__((__alias__("lzma_stream_encoder_mt_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.2.2",
+ lzma_ret, lzma_stream_encoder_mt_522)(
+ lzma_stream *strm, const lzma_mt *options)
+ lzma_nothrow lzma_attr_warn_unused_result
+ __attribute__((__alias__("lzma_stream_encoder_mt_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt@@XZ_5.2",
+ lzma_ret, lzma_stream_encoder_mt_52)(
+ lzma_stream *strm, const lzma_mt *options)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+#define lzma_stream_encoder_mt lzma_stream_encoder_mt_52
+#endif
extern LZMA_API(lzma_ret)
lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
{
@@ -1090,6 +1213,23 @@ lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
}
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.1.2alpha",
+ uint64_t, lzma_stream_encoder_mt_memusage_512a)(
+ const lzma_mt *options) lzma_nothrow lzma_attr_pure
+ __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.2.2",
+ uint64_t, lzma_stream_encoder_mt_memusage_522)(
+ const lzma_mt *options) lzma_nothrow lzma_attr_pure
+ __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@@XZ_5.2",
+ uint64_t, lzma_stream_encoder_mt_memusage_52)(
+ const lzma_mt *options) lzma_nothrow lzma_attr_pure;
+
+#define lzma_stream_encoder_mt_memusage lzma_stream_encoder_mt_memusage_52
+#endif
// This function name is a monster but it's consistent with the older
// monster names. :-( 31 chars is the max that C99 requires so in that
// sense it's not too long. ;-)
diff --git a/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c b/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c
index 4e43e359e1..b8d263ba44 100644
--- a/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c
+++ b/contrib/libs/lzma/liblzma/common/stream_flags_decoder.c
@@ -39,8 +39,11 @@ lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in)
const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic),
LZMA_STREAM_FLAGS_SIZE, 0);
if (crc != read32le(in + sizeof(lzma_header_magic)
- + LZMA_STREAM_FLAGS_SIZE))
+ + LZMA_STREAM_FLAGS_SIZE)) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
return LZMA_DATA_ERROR;
+#endif
+ }
// Stream Flags
if (stream_flags_decode(options, in + sizeof(lzma_header_magic)))
@@ -67,8 +70,11 @@ lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in)
// CRC32
const uint32_t crc = lzma_crc32(in + sizeof(uint32_t),
sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0);
- if (crc != read32le(in))
+ if (crc != read32le(in)) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
return LZMA_DATA_ERROR;
+#endif
+ }
// Stream Flags
if (stream_flags_decode(options, in + sizeof(uint32_t) * 2))
diff --git a/contrib/libs/lzma/liblzma/common/string_conversion.c b/contrib/libs/lzma/liblzma/common/string_conversion.c
new file mode 100644
index 0000000000..53fdff2a62
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/common/string_conversion.c
@@ -0,0 +1,1317 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file string_conversion.c
+/// \brief Conversion of strings to filter chain and vice versa
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_common.h"
+
+
+/////////////////////
+// String building //
+/////////////////////
+
+/// How much memory to allocate for strings. For now, no realloc is used
+/// so this needs to be big enough even though there of course is
+/// an overflow check still.
+///
+/// FIXME? Using a fixed size is wasteful if the application doesn't free
+/// the string fairly quickly but this can be improved later if needed.
+#define STR_ALLOC_SIZE 800
+
+
+typedef struct {
+ char *buf;
+ size_t pos;
+} lzma_str;
+
+
+static lzma_ret
+str_init(lzma_str *str, const lzma_allocator *allocator)
+{
+ str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
+ if (str->buf == NULL)
+ return LZMA_MEM_ERROR;
+
+ str->pos = 0;
+ return LZMA_OK;
+}
+
+
+static void
+str_free(lzma_str *str, const lzma_allocator *allocator)
+{
+ lzma_free(str->buf, allocator);
+ return;
+}
+
+
+static bool
+str_is_full(const lzma_str *str)
+{
+ return str->pos == STR_ALLOC_SIZE - 1;
+}
+
+
+static lzma_ret
+str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
+{
+ if (str_is_full(str)) {
+ // The preallocated buffer was too small.
+ // This shouldn't happen as STR_ALLOC_SIZE should
+ // be adjusted if new filters are added.
+ lzma_free(str->buf, allocator);
+ *dest = NULL;
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ str->buf[str->pos] = '\0';
+ *dest = str->buf;
+ return LZMA_OK;
+}
+
+
+static void
+str_append_str(lzma_str *str, const char *s)
+{
+ const size_t len = strlen(s);
+ const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
+ const size_t copy_size = my_min(len, limit);
+
+ memcpy(str->buf + str->pos, s, copy_size);
+ str->pos += copy_size;
+ return;
+}
+
+
+static void
+str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
+{
+ if (v == 0) {
+ str_append_str(str, "0");
+ } else {
+ // NOTE: Don't use plain "B" because xz and the parser in this
+ // file don't support it and at glance it may look like 8
+ // (there cannot be a space before the suffix).
+ static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };
+
+ size_t suf = 0;
+ if (use_byte_suffix) {
+ while ((v & 1023) == 0
+ && suf < ARRAY_SIZE(suffixes) - 1) {
+ v >>= 10;
+ ++suf;
+ }
+ }
+
+ // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
+ // that initializing to "" initializes all elements to
+ // zero so '\0'-termination gets handled by this.
+ char buf[16] = "";
+ size_t pos = sizeof(buf) - 1;
+
+ do {
+ buf[--pos] = '0' + (v % 10);
+ v /= 10;
+ } while (v != 0);
+
+ str_append_str(str, buf + pos);
+ str_append_str(str, suffixes[suf]);
+ }
+
+ return;
+}
+
+
+//////////////////////////////////////////////
+// Parsing and stringification declarations //
+//////////////////////////////////////////////
+
+/// Maximum length for filter and option names.
+/// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
+#define NAME_LEN_MAX 11
+
+
+/// For option_map.flags: Use .u.map to do convert the input value
+/// to an integer. Without this flag, .u.range.{min,max} are used
+/// as the allowed range for the integer.
+#define OPTMAP_USE_NAME_VALUE_MAP 0x01
+
+/// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
+/// the stringified output if the value is an exact multiple of these.
+/// This is used e.g. for LZMA1/2 dictionary size.
+#define OPTMAP_USE_BYTE_SUFFIX 0x02
+
+/// For option_map.flags: If the integer value is zero then this option
+/// won't be included in the stringified output. It's used e.g. for
+/// BCJ filter start offset which usually is zero.
+#define OPTMAP_NO_STRFY_ZERO 0x04
+
+/// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
+/// it doesn't need to be specified in the initializers as it is
+/// the implicit value.
+enum {
+ OPTMAP_TYPE_UINT32,
+ OPTMAP_TYPE_LZMA_MODE,
+ OPTMAP_TYPE_LZMA_MATCH_FINDER,
+ OPTMAP_TYPE_LZMA_PRESET,
+};
+
+
+/// This is for mapping string values in options to integers.
+/// The last element of an array must have "" as the name.
+/// It's used e.g. for match finder names in LZMA1/2.
+typedef struct {
+ const char name[NAME_LEN_MAX + 1];
+ const uint32_t value;
+} name_value_map;
+
+
+/// Each filter that has options needs an array of option_map structures.
+/// The array doesn't need to be terminated as the functions take the
+/// length of the array as an argument.
+///
+/// When converting a string to filter options structure, option values
+/// will be handled in a few different ways:
+///
+/// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
+/// is handled specially.
+///
+/// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
+/// converted to an integer using the name_value_map pointed by .u.map.
+/// The last element in .u.map must have .name = "" as the terminator.
+///
+/// (3) Otherwise the string is treated as a non-negative unsigned decimal
+/// integer which must be in the range set in .u.range. If .flags has
+/// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
+///
+/// The integer value from (2) or (3) is then stored to filter_options
+/// at the offset specified in .offset using the type specified in .type
+/// (default is uint32_t).
+///
+/// Stringifying a filter is done by processing a given number of options
+/// in oder from the beginning of an option_map array. The integer is
+/// read from filter_options at .offset using the type from .type.
+///
+/// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
+/// option is skipped.
+///
+/// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
+/// to convert the option to a string. If the map doesn't contain a string
+/// for the integer value then "UNKNOWN" is used.
+///
+/// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
+/// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
+/// MiB, or GiB suffix is used if the value is an exact multiple of these.
+/// Plain "B" suffix is never used.
+typedef struct {
+ char name[NAME_LEN_MAX + 1];
+ uint8_t type;
+ uint8_t flags;
+ uint16_t offset;
+
+ union {
+ struct {
+ uint32_t min;
+ uint32_t max;
+ } range;
+
+ const name_value_map *map;
+ } u;
+} option_map;
+
+
+static const char *parse_options(const char **const str, const char *str_end,
+ void *filter_options,
+ const option_map *const optmap, const size_t optmap_size);
+
+
+/////////
+// BCJ //
+/////////
+
+#if defined(HAVE_ENCODER_X86) \
+ || defined(HAVE_DECODER_X86) \
+ || defined(HAVE_ENCODER_ARM) \
+ || defined(HAVE_DECODER_ARM) \
+ || defined(HAVE_ENCODER_ARMTHUMB) \
+ || defined(HAVE_DECODER_ARMTHUMB) \
+ || defined(HAVE_ENCODER_ARM64) \
+ || defined(HAVE_DECODER_ARM64) \
+ || defined(HAVE_ENCODER_POWERPC) \
+ || defined(HAVE_DECODER_POWERPC) \
+ || defined(HAVE_ENCODER_IA64) \
+ || defined(HAVE_DECODER_IA64) \
+ || defined(HAVE_ENCODER_SPARC) \
+ || defined(HAVE_DECODER_SPARC)
+static const option_map bcj_optmap[] = {
+ {
+ .name = "start",
+ .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
+ .offset = offsetof(lzma_options_bcj, start_offset),
+ .u.range.min = 0,
+ .u.range.max = UINT32_MAX,
+ }
+};
+
+
+static const char *
+parse_bcj(const char **const str, const char *str_end, void *filter_options)
+{
+ // filter_options was zeroed on allocation and that is enough
+ // for the default value.
+ return parse_options(str, str_end, filter_options,
+ bcj_optmap, ARRAY_SIZE(bcj_optmap));
+}
+#endif
+
+
+///////////
+// Delta //
+///////////
+
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+static const option_map delta_optmap[] = {
+ {
+ .name = "dist",
+ .offset = offsetof(lzma_options_delta, dist),
+ .u.range.min = LZMA_DELTA_DIST_MIN,
+ .u.range.max = LZMA_DELTA_DIST_MAX,
+ }
+};
+
+
+static const char *
+parse_delta(const char **const str, const char *str_end, void *filter_options)
+{
+ lzma_options_delta *opts = filter_options;
+ opts->type = LZMA_DELTA_TYPE_BYTE;
+ opts->dist = LZMA_DELTA_DIST_MIN;
+
+ return parse_options(str, str_end, filter_options,
+ delta_optmap, ARRAY_SIZE(delta_optmap));
+}
+#endif
+
+
+///////////////////
+// LZMA1 & LZMA2 //
+///////////////////
+
+/// Help string for presets
+#define LZMA12_PRESET_STR "0-9[e]"
+
+
+static const char *
+parse_lzma12_preset(const char **const str, const char *str_end,
+ uint32_t *preset)
+{
+ assert(*str < str_end);
+ *preset = (uint32_t)(**str - '0');
+
+ // NOTE: Remember to update LZMA_PRESET_STR if this is modified!
+ while (++*str < str_end) {
+ switch (**str) {
+ case 'e':
+ *preset |= LZMA_PRESET_EXTREME;
+ break;
+
+ default:
+ return "Unsupported preset flag";
+ }
+ }
+
+ return NULL;
+}
+
+
+static const char *
+set_lzma12_preset(const char **const str, const char *str_end,
+ void *filter_options)
+{
+ uint32_t preset;
+ const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
+ if (errmsg != NULL)
+ return errmsg;
+
+ lzma_options_lzma *opts = filter_options;
+ if (lzma_lzma_preset(opts, preset))
+ return "Unsupported preset";
+
+ return NULL;
+}
+
+
+static const name_value_map lzma12_mode_map[] = {
+ { "fast", LZMA_MODE_FAST },
+ { "normal", LZMA_MODE_NORMAL },
+ { "", 0 }
+};
+
+
+static const name_value_map lzma12_mf_map[] = {
+ { "hc3", LZMA_MF_HC3 },
+ { "hc4", LZMA_MF_HC4 },
+ { "bt2", LZMA_MF_BT2 },
+ { "bt3", LZMA_MF_BT3 },
+ { "bt4", LZMA_MF_BT4 },
+ { "", 0 }
+};
+
+
+static const option_map lzma12_optmap[] = {
+ {
+ .name = "preset",
+ .type = OPTMAP_TYPE_LZMA_PRESET,
+ }, {
+ .name = "dict",
+ .flags = OPTMAP_USE_BYTE_SUFFIX,
+ .offset = offsetof(lzma_options_lzma, dict_size),
+ .u.range.min = LZMA_DICT_SIZE_MIN,
+ // FIXME? The max is really max for encoding but decoding
+ // would allow 4 GiB - 1 B.
+ .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
+ }, {
+ .name = "lc",
+ .offset = offsetof(lzma_options_lzma, lc),
+ .u.range.min = LZMA_LCLP_MIN,
+ .u.range.max = LZMA_LCLP_MAX,
+ }, {
+ .name = "lp",
+ .offset = offsetof(lzma_options_lzma, lp),
+ .u.range.min = LZMA_LCLP_MIN,
+ .u.range.max = LZMA_LCLP_MAX,
+ }, {
+ .name = "pb",
+ .offset = offsetof(lzma_options_lzma, pb),
+ .u.range.min = LZMA_PB_MIN,
+ .u.range.max = LZMA_PB_MAX,
+ }, {
+ .name = "mode",
+ .type = OPTMAP_TYPE_LZMA_MODE,
+ .flags = OPTMAP_USE_NAME_VALUE_MAP,
+ .offset = offsetof(lzma_options_lzma, mode),
+ .u.map = lzma12_mode_map,
+ }, {
+ .name = "nice",
+ .offset = offsetof(lzma_options_lzma, nice_len),
+ .u.range.min = 2,
+ .u.range.max = 273,
+ }, {
+ .name = "mf",
+ .type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
+ .flags = OPTMAP_USE_NAME_VALUE_MAP,
+ .offset = offsetof(lzma_options_lzma, mf),
+ .u.map = lzma12_mf_map,
+ }, {
+ .name = "depth",
+ .offset = offsetof(lzma_options_lzma, depth),
+ .u.range.min = 0,
+ .u.range.max = UINT32_MAX,
+ }
+};
+
+
+static const char *
+parse_lzma12(const char **const str, const char *str_end, void *filter_options)
+{
+ lzma_options_lzma *opts = filter_options;
+
+ // It cannot fail.
+ const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
+ assert(!preset_ret);
+ (void)preset_ret;
+
+ const char *errmsg = parse_options(str, str_end, filter_options,
+ lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
+ if (errmsg != NULL)
+ return errmsg;
+
+ if (opts->lc + opts->lp > LZMA_LCLP_MAX)
+ return "The sum of lc and lp must not exceed 4";
+
+ return NULL;
+}
+
+
+/////////////////////////////////////////
+// Generic parsing and stringification //
+/////////////////////////////////////////
+
+static const struct {
+ /// Name of the filter
+ char name[NAME_LEN_MAX + 1];
+
+ /// For lzma_str_to_filters:
+ /// Size of the filter-specific options structure.
+ uint32_t opts_size;
+
+ /// Filter ID
+ lzma_vli id;
+
+ /// For lzma_str_to_filters:
+ /// Function to parse the filter-specific options. The filter_options
+ /// will already have been allocated using lzma_alloc_zero().
+ const char *(*parse)(const char **str, const char *str_end,
+ void *filter_options);
+
+ /// For lzma_str_from_filters:
+ /// If the flag LZMA_STR_ENCODER is used then the first
+ /// strfy_encoder elements of optmap are stringified.
+ /// With LZMA_STR_DECODER strfy_decoder is used.
+ /// Currently encoders use all flags that decoders do but if
+ /// that changes then this needs to be changed too, for example,
+ /// add a new OPTMAP flag to skip printing some decoder-only flags.
+ const option_map *optmap;
+ uint8_t strfy_encoder;
+ uint8_t strfy_decoder;
+
+ /// For lzma_str_from_filters:
+ /// If true, lzma_filter.options is allowed to be NULL. In that case,
+ /// only the filter name is printed without any options.
+ bool allow_null;
+
+} filter_name_map[] = {
+#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
+ { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1,
+ &parse_lzma12, lzma12_optmap, 9, 5, false },
+#endif
+
+#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
+ { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2,
+ &parse_lzma12, lzma12_optmap, 9, 2, false },
+#endif
+
+#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
+ { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
+ { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
+ { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
+ { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
+ { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
+ { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
+ { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+ { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
+ &parse_delta, delta_optmap, 1, 1, false },
+#endif
+};
+
+
+/// Decodes options from a string for one filter (name1=value1,name2=value2).
+/// Caller must have allocated memory for filter_options already and set
+/// the initial default values. This is called from the filter-specific
+/// parse_* functions.
+///
+/// The input string starts at *str and the address in str_end is the first
+/// char that is not part of the string anymore. So no '\0' terminator is
+/// used. *str is advanced everytime something has been decoded successfully.
+static const char *
+parse_options(const char **const str, const char *str_end,
+ void *filter_options,
+ const option_map *const optmap, const size_t optmap_size)
+{
+ while (*str < str_end && **str != '\0') {
+ // Each option is of the form name=value.
+ // Commas (',') separate options. Extra commas are ignored.
+ // Ignoring extra commas makes it simpler if an optional
+ // option stored in a shell variable which can be empty.
+ if (**str == ',') {
+ ++*str;
+ continue;
+ }
+
+ // Find where the next name=value ends.
+ const size_t str_len = (size_t)(str_end - *str);
+ const char *name_eq_value_end = memchr(*str, ',', str_len);
+ if (name_eq_value_end == NULL)
+ name_eq_value_end = str_end;
+
+ const char *equals_sign = memchr(*str, '=',
+ (size_t)(name_eq_value_end - *str));
+
+ // Fail if the '=' wasn't found or the option name is missing
+ // (the first char is '=').
+ if (equals_sign == NULL || **str == '=')
+ return "Options must be 'name=value' pairs separated "
+ "with commas";
+
+ // Reject a too long option name so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in optmap[i].name.
+ const size_t name_len = (size_t)(equals_sign - *str);
+ if (name_len > NAME_LEN_MAX)
+ return "Unknown option name";
+
+ // Find the option name from optmap[].
+ size_t i = 0;
+ while (true) {
+ if (i == optmap_size)
+ return "Unknown option name";
+
+ if (memcmp(*str, optmap[i].name, name_len) == 0
+ && optmap[i].name[name_len] == '\0')
+ break;
+
+ ++i;
+ }
+
+ // The input string is good at least until the start of
+ // the option value.
+ *str = equals_sign + 1;
+
+ // The code assumes that the option value isn't an empty
+ // string so check it here.
+ const size_t value_len = (size_t)(name_eq_value_end - *str);
+ if (value_len == 0)
+ return "Option value cannot be empty";
+
+ // LZMA1/2 preset has its own parsing function.
+ if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
+ const char *errmsg = set_lzma12_preset(str,
+ name_eq_value_end, filter_options);
+ if (errmsg != NULL)
+ return errmsg;
+
+ continue;
+ }
+
+ // It's an integer value.
+ uint32_t v;
+ if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
+ // The integer is picked from a string-to-integer map.
+ //
+ // Reject a too long value string so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in optmap[i].u.map[j].name.
+ if (value_len > NAME_LEN_MAX)
+ return "Invalid option value";
+
+ const name_value_map *map = optmap[i].u.map;
+ size_t j = 0;
+ while (true) {
+ // The array is terminated with an empty name.
+ if (map[j].name[0] == '\0')
+ return "Invalid option value";
+
+ if (memcmp(*str, map[j].name, value_len) == 0
+ && map[j].name[value_len]
+ == '\0') {
+ v = map[j].value;
+ break;
+ }
+
+ ++j;
+ }
+ } else if (**str < '0' || **str > '9') {
+ // Note that "max" isn't supported while it is
+ // supported in xz. It's not useful here.
+ return "Value is not a non-negative decimal integer";
+ } else {
+ // strtoul() has locale-specific behavior so it cannot
+ // be relied on to get reproducible results since we
+ // cannot change the locate in a thread-safe library.
+ // It also needs '\0'-termination.
+ //
+ // Use a temporary pointer so that *str will point
+ // to the beginning of the value string in case
+ // an error occurs.
+ const char *p = *str;
+ v = 0;
+ do {
+ if (v > UINT32_MAX / 10)
+ return "Value out of range";
+
+ v *= 10;
+
+ const uint32_t add = (uint32_t)(*p - '0');
+ if (UINT32_MAX - add < v)
+ return "Value out of range";
+
+ v += add;
+ ++p;
+ } while (p < name_eq_value_end
+ && *p >= '0' && *p <= '9');
+
+ if (p < name_eq_value_end) {
+ // Remember this position so that it an be
+ // used for error messages that are
+ // specifically about the suffix. (Out of
+ // range values are about the whole value
+ // and those error messages point to the
+ // beginning of the number part,
+ // not to the suffix.)
+ const char *multiplier_start = p;
+
+ // If multiplier suffix shouldn't be used
+ // then don't allow them even if the value
+ // would stay within limits. This is a somewhat
+ // unnecessary check but it rejects silly
+ // things like lzma2:pb=0MiB which xz allows.
+ if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
+ == 0) {
+ *str = multiplier_start;
+ return "This option does not support "
+ "any integer suffixes";
+ }
+
+ uint32_t shift;
+
+ switch (*p) {
+ case 'k':
+ case 'K':
+ shift = 10;
+ break;
+
+ case 'm':
+ case 'M':
+ shift = 20;
+ break;
+
+ case 'g':
+ case 'G':
+ shift = 30;
+ break;
+
+ default:
+ *str = multiplier_start;
+ return "Invalid multiplier suffix "
+ "(KiB, MiB, or GiB)";
+ }
+
+ ++p;
+
+ // Allow "M", "Mi", "MB", "MiB" and the same
+ // for the other five characters from the
+ // switch-statement above. All are handled
+ // as base-2 (perhaps a mistake, perhaps not).
+ // Note that 'i' and 'B' are case sensitive.
+ if (p < name_eq_value_end && *p == 'i')
+ ++p;
+
+ if (p < name_eq_value_end && *p == 'B')
+ ++p;
+
+ // Now we must have no chars remaining.
+ if (p < name_eq_value_end) {
+ *str = multiplier_start;
+ return "Invalid multiplier suffix "
+ "(KiB, MiB, or GiB)";
+ }
+
+ if (v > (UINT32_MAX >> shift))
+ return "Value out of range";
+
+ v <<= shift;
+ }
+
+ if (v < optmap[i].u.range.min
+ || v > optmap[i].u.range.max)
+ return "Value out of range";
+ }
+
+ // Set the value in filter_options. Enums are handled
+ // specially since the underlying type isn't the same
+ // as uint32_t on all systems.
+ void *ptr = (char *)filter_options + optmap[i].offset;
+ switch (optmap[i].type) {
+ case OPTMAP_TYPE_LZMA_MODE:
+ *(lzma_mode *)ptr = (lzma_mode)v;
+ break;
+
+ case OPTMAP_TYPE_LZMA_MATCH_FINDER:
+ *(lzma_match_finder *)ptr = (lzma_match_finder)v;
+ break;
+
+ default:
+ *(uint32_t *)ptr = v;
+ break;
+ }
+
+ // This option has been successfully handled.
+ *str = name_eq_value_end;
+ }
+
+ // No errors.
+ return NULL;
+}
+
+
+/// Finds the name of the filter at the beginning of the string and
+/// calls filter_name_map[i].parse() to decode the filter-specific options.
+/// The caller must have set str_end so that exactly one filter and its
+/// options are present without any trailing characters.
+static const char *
+parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
+ const lzma_allocator *allocator, bool only_xz)
+{
+ // Search for a colon or equals sign that would separate the filter
+ // name from filter options. If neither is found, then the input
+ // string only contains a filter name and there are no options.
+ //
+ // First assume that a colon or equals sign won't be found:
+ const char *name_end = str_end;
+ const char *opts_start = str_end;
+
+ for (const char *p = *str; p < str_end; ++p) {
+ if (*p == ':' || *p == '=') {
+ name_end = p;
+
+ // Filter options (name1=value1,name2=value2,...)
+ // begin after the colon or equals sign.
+ opts_start = p + 1;
+ break;
+ }
+ }
+
+ // Reject a too long filter name so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in filter_name_map[i].name.
+ const size_t name_len = (size_t)(name_end - *str);
+ if (name_len > NAME_LEN_MAX)
+ return "Unknown filter name";
+
+ for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
+ if (memcmp(*str, filter_name_map[i].name, name_len) == 0
+ && filter_name_map[i].name[name_len] == '\0') {
+ if (only_xz && filter_name_map[i].id
+ >= LZMA_FILTER_RESERVED_START)
+ return "This filter cannot be used in "
+ "the .xz format";
+
+ // Allocate the filter-specific options and
+ // initialize the memory with zeros.
+ void *options = lzma_alloc_zero(
+ filter_name_map[i].opts_size,
+ allocator);
+ if (options == NULL)
+ return "Memory allocation failed";
+
+ // Filter name was found so the input string is good
+ // at least this far.
+ *str = opts_start;
+
+ const char *errmsg = filter_name_map[i].parse(
+ str, str_end, options);
+ if (errmsg != NULL) {
+ lzma_free(options, allocator);
+ return errmsg;
+ }
+
+ // *filter is modified only when parsing is successful.
+ filter->id = filter_name_map[i].id;
+ filter->options = options;
+ return NULL;
+ }
+ }
+
+ return "Unknown filter name";
+}
+
+
+/// Converts the string to a filter chain (array of lzma_filter structures).
+///
+/// *str is advanced everytime something has been decoded successfully.
+/// This way the caller knows where in the string a possible error occurred.
+static const char *
+str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
+ const lzma_allocator *allocator)
+{
+ const char *errmsg;
+
+ // Skip leading spaces.
+ while (**str == ' ')
+ ++*str;
+
+ if (**str == '\0')
+ return "Empty string is not allowed, "
+ "try \"6\" if a default value is needed";
+
+ // Detect the type of the string.
+ //
+ // A string beginning with a digit or a string beginning with
+ // one dash and a digit are treated as presets. Trailing spaces
+ // will be ignored too (leading spaces were already ignored above).
+ //
+ // For example, "6", "7 ", "-9e", or " -3 " are treated as presets.
+ // Strings like "-" or "- " aren't preset.
+#define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
+ if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
+ if (**str == '-')
+ ++*str;
+
+ // Ignore trailing spaces.
+ const size_t str_len = strlen(*str);
+ const char *str_end = memchr(*str, ' ', str_len);
+ if (str_end != NULL) {
+ // There is at least one trailing space. Check that
+ // there are no chars other than spaces.
+ for (size_t i = 1; str_end[i] != '\0'; ++i)
+ if (str_end[i] != ' ')
+ return "Unsupported preset";
+ } else {
+ // There are no trailing spaces. Use the whole string.
+ str_end = *str + str_len;
+ }
+
+ uint32_t preset;
+ errmsg = parse_lzma12_preset(str, str_end, &preset);
+ if (errmsg != NULL)
+ return errmsg;
+
+ lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
+ if (opts == NULL)
+ return "Memory allocation failed";
+
+ if (lzma_lzma_preset(opts, preset)) {
+ lzma_free(opts, allocator);
+ return "Unsupported preset";
+ }
+
+ filters[0].id = LZMA_FILTER_LZMA2;
+ filters[0].options = opts;
+ filters[1].id = LZMA_VLI_UNKNOWN;
+ filters[1].options = NULL;
+
+ return NULL;
+ }
+
+ // Not a preset so it must be a filter chain.
+ //
+ // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
+ // can be used in .xz.
+ const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;
+
+ // Use a temporary array so that we don't modify the caller-supplied
+ // one until we know that no errors occurred.
+ lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];
+
+ size_t i = 0;
+ do {
+ if (i == LZMA_FILTERS_MAX) {
+ errmsg = "The maximum number of filters is four";
+ goto error;
+ }
+
+ // Skip "--" if present.
+ if ((*str)[0] == '-' && (*str)[1] == '-')
+ *str += 2;
+
+ // Locate the end of "filter:name1=value1,name2=value2",
+ // stopping at the first "--" or a single space.
+ const char *filter_end = *str;
+ while (filter_end[0] != '\0') {
+ if ((filter_end[0] == '-' && filter_end[1] == '-')
+ || filter_end[0] == ' ')
+ break;
+
+ ++filter_end;
+ }
+
+ // Inputs that have "--" at the end or "-- " in the middle
+ // will result in an empty filter name.
+ if (filter_end == *str) {
+ errmsg = "Filter name is missing";
+ goto error;
+ }
+
+ errmsg = parse_filter(str, filter_end, &temp_filters[i],
+ allocator, only_xz);
+ if (errmsg != NULL)
+ goto error;
+
+ // Skip trailing spaces.
+ while (**str == ' ')
+ ++*str;
+
+ ++i;
+ } while (**str != '\0');
+
+ // Seems to be good, terminate the array so that
+ // basic validation can be done.
+ temp_filters[i].id = LZMA_VLI_UNKNOWN;
+ temp_filters[i].options = NULL;
+
+ // Do basic validation if the application didn't prohibit it.
+ if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
+ size_t dummy;
+ const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
+ assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
+ if (ret != LZMA_OK) {
+ errmsg = "Invalid filter chain "
+ "('lzma2' missing at the end?)";
+ goto error;
+ }
+ }
+
+ // All good. Copy the filters to the application supplied array.
+ memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
+ return NULL;
+
+error:
+ // Free the filter options that were successfully decoded.
+ while (i-- > 0)
+ lzma_free(temp_filters[i].options, allocator);
+
+ return errmsg;
+}
+
+
+extern LZMA_API(const char *)
+lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
+ uint32_t flags, const lzma_allocator *allocator)
+{
+ if (str == NULL || filters == NULL)
+ return "Unexpected NULL pointer argument(s) "
+ "to lzma_str_to_filters()";
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ALL_FILTERS
+ | LZMA_STR_NO_VALIDATION;
+
+ if (flags & ~supported_flags)
+ return "Unsupported flags to lzma_str_to_filters()";
+
+ const char *used = str;
+ const char *errmsg = str_to_filters(&used, filters, flags, allocator);
+
+ if (error_pos != NULL) {
+ const size_t n = (size_t)(used - str);
+ *error_pos = n > INT_MAX ? INT_MAX : (int)n;
+ }
+
+ return errmsg;
+}
+
+
+/// Converts options of one filter to a string.
+///
+/// The caller must have already put the filter name in the destination
+/// string. Since it is possible that no options will be needed, the caller
+/// won't have put a delimiter character (':' or '=') in the string yet.
+/// We will add it if at least one option will be added to the string.
+static void
+strfy_filter(lzma_str *dest, const char *delimiter,
+ const option_map *optmap, size_t optmap_count,
+ const void *filter_options)
+{
+ for (size_t i = 0; i < optmap_count; ++i) {
+ // No attempt is made to reverse LZMA1/2 preset.
+ if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
+ continue;
+
+ // All options have integer values, some just are mapped
+ // to a string with a name_value_map. LZMA1/2 preset
+ // isn't reversed back to preset=PRESET form.
+ uint32_t v;
+ const void *ptr
+ = (const char *)filter_options + optmap[i].offset;
+ switch (optmap[i].type) {
+ case OPTMAP_TYPE_LZMA_MODE:
+ v = *(const lzma_mode *)ptr;
+ break;
+
+ case OPTMAP_TYPE_LZMA_MATCH_FINDER:
+ v = *(const lzma_match_finder *)ptr;
+ break;
+
+ default:
+ v = *(const uint32_t *)ptr;
+ break;
+ }
+
+ // Skip this if this option should be omitted from
+ // the string when the value is zero.
+ if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
+ continue;
+
+ // Before the first option we add whatever delimiter
+ // the caller gave us. For later options a comma is used.
+ str_append_str(dest, delimiter);
+ delimiter = ",";
+
+ // Add the option name and equals sign.
+ str_append_str(dest, optmap[i].name);
+ str_append_str(dest, "=");
+
+ if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
+ const name_value_map *map = optmap[i].u.map;
+ size_t j = 0;
+ while (true) {
+ if (map[j].name[0] == '\0') {
+ str_append_str(dest, "UNKNOWN");
+ break;
+ }
+
+ if (map[j].value == v) {
+ str_append_str(dest, map[j].name);
+ break;
+ }
+
+ ++j;
+ }
+ } else {
+ str_append_u32(dest, v,
+ optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
+ }
+ }
+
+ return;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_str_from_filters(char **output_str, const lzma_filter *filters,
+ uint32_t flags, const lzma_allocator *allocator)
+{
+ // On error *output_str is always set to NULL.
+ // Do it as the very first step.
+ if (output_str == NULL)
+ return LZMA_PROG_ERROR;
+
+ *output_str = NULL;
+
+ if (filters == NULL)
+ return LZMA_PROG_ERROR;
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ENCODER
+ | LZMA_STR_DECODER
+ | LZMA_STR_GETOPT_LONG
+ | LZMA_STR_NO_SPACES;
+
+ if (flags & ~supported_flags)
+ return LZMA_OPTIONS_ERROR;
+
+ // There must be at least one filter.
+ if (filters[0].id == LZMA_VLI_UNKNOWN)
+ return LZMA_OPTIONS_ERROR;
+
+ // Allocate memory for the output string.
+ lzma_str dest;
+ return_if_error(str_init(&dest, allocator));
+
+ const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
+
+ const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
+
+ for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
+ // Don't add a space between filters if the caller
+ // doesn't want them.
+ if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
+ str_append_str(&dest, " ");
+
+ // Use dashes for xz getopt_long() compatible syntax but also
+ // use dashes to separate filters when spaces weren't wanted.
+ if ((flags & LZMA_STR_GETOPT_LONG)
+ || (i > 0 && (flags & LZMA_STR_NO_SPACES)))
+ str_append_str(&dest, "--");
+
+ size_t j = 0;
+ while (true) {
+ if (j == ARRAY_SIZE(filter_name_map)) {
+ // Filter ID in filters[i].id isn't supported.
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ if (filter_name_map[j].id == filters[i].id) {
+ // Add the filter name.
+ str_append_str(&dest, filter_name_map[j].name);
+
+ // If only the filter names were wanted then
+ // skip to the next filter. In this case
+ // .options is ignored and may be NULL even
+ // when the filter doesn't allow NULL options.
+ if (!show_opts)
+ break;
+
+ if (filters[i].options == NULL) {
+ if (!filter_name_map[j].allow_null) {
+ // Filter-specific options
+ // are missing but with
+ // this filter the options
+ // structure is mandatory.
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ // .options is allowed to be NULL.
+ // There is no need to add any
+ // options to the string.
+ break;
+ }
+
+ // Options structure is available. Add
+ // the filter options to the string.
+ const size_t optmap_count
+ = (flags & LZMA_STR_ENCODER)
+ ? filter_name_map[j].strfy_encoder
+ : filter_name_map[j].strfy_decoder;
+ strfy_filter(&dest, opt_delim,
+ filter_name_map[j].optmap,
+ optmap_count,
+ filters[i].options);
+ break;
+ }
+
+ ++j;
+ }
+ }
+
+ return str_finish(output_str, &dest, allocator);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
+ const lzma_allocator *allocator)
+{
+ // On error *output_str is always set to NULL.
+ // Do it as the very first step.
+ if (output_str == NULL)
+ return LZMA_PROG_ERROR;
+
+ *output_str = NULL;
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ALL_FILTERS
+ | LZMA_STR_ENCODER
+ | LZMA_STR_DECODER
+ | LZMA_STR_GETOPT_LONG;
+
+ if (flags & ~supported_flags)
+ return LZMA_OPTIONS_ERROR;
+
+ // Allocate memory for the output string.
+ lzma_str dest;
+ return_if_error(str_init(&dest, allocator));
+
+ // If only listing the filter names then separate them with spaces.
+ // Otherwise use newlines.
+ const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
+ const char *filter_delim = show_opts ? "\n" : " ";
+
+ const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
+ bool first_filter_printed = false;
+
+ for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
+ // If we are printing only one filter then skip others.
+ if (filter_id != LZMA_VLI_UNKNOWN
+ && filter_id != filter_name_map[i].id)
+ continue;
+
+ // If we are printing only .xz filters then skip the others.
+ if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
+ && (flags & LZMA_STR_ALL_FILTERS) == 0
+ && filter_id == LZMA_VLI_UNKNOWN)
+ continue;
+
+ // Add a new line if this isn't the first filter being
+ // written to the string.
+ if (first_filter_printed)
+ str_append_str(&dest, filter_delim);
+
+ first_filter_printed = true;
+
+ if (flags & LZMA_STR_GETOPT_LONG)
+ str_append_str(&dest, "--");
+
+ str_append_str(&dest, filter_name_map[i].name);
+
+ // If only the filter names were wanted then continue
+ // to the next filter.
+ if (!show_opts)
+ continue;
+
+ const option_map *optmap = filter_name_map[i].optmap;
+ const char *d = opt_delim;
+
+ const size_t end = (flags & LZMA_STR_ENCODER)
+ ? filter_name_map[i].strfy_encoder
+ : filter_name_map[i].strfy_decoder;
+
+ for (size_t j = 0; j < end; ++j) {
+ // The first option is delimited from the filter
+ // name using "=" or ":" and the rest of the options
+ // are separated with ",".
+ str_append_str(&dest, d);
+ d = ",";
+
+ // optname=<possible_values>
+ str_append_str(&dest, optmap[j].name);
+ str_append_str(&dest, "=<");
+
+ if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
+ // LZMA1/2 preset has its custom help string.
+ str_append_str(&dest, LZMA12_PRESET_STR);
+ } else if (optmap[j].flags
+ & OPTMAP_USE_NAME_VALUE_MAP) {
+ // Separate the possible option values by "|".
+ const name_value_map *m = optmap[j].u.map;
+ for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
+ if (k > 0)
+ str_append_str(&dest, "|");
+
+ str_append_str(&dest, m[k].name);
+ }
+ } else {
+ // Integer range is shown as min-max.
+ const bool use_byte_suffix = optmap[j].flags
+ & OPTMAP_USE_BYTE_SUFFIX;
+ str_append_u32(&dest, optmap[j].u.range.min,
+ use_byte_suffix);
+ str_append_str(&dest, "-");
+ str_append_u32(&dest, optmap[j].u.range.max,
+ use_byte_suffix);
+ }
+
+ str_append_str(&dest, ">");
+ }
+ }
+
+ // If no filters were added to the string then it must be because
+ // the caller provided an unsupported Filter ID.
+ if (!first_filter_printed) {
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ return str_finish(output_str, &dest, allocator);
+}
diff --git a/contrib/libs/lzma/liblzma/lz/lz_decoder.c b/contrib/libs/lzma/liblzma/lz/lz_decoder.c
index 09b574388f..06c95c1137 100644
--- a/contrib/libs/lzma/liblzma/lz/lz_decoder.c
+++ b/contrib/libs/lzma/liblzma/lz/lz_decoder.c
@@ -212,7 +212,8 @@ extern lzma_ret
lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
const lzma_filter_info *filters,
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
- const lzma_allocator *allocator, const void *options,
+ const lzma_allocator *allocator,
+ lzma_vli id, const void *options,
lzma_lz_options *lz_options))
{
// Allocate the base structure if it isn't already allocated.
@@ -236,7 +237,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
// us the dictionary size.
lzma_lz_options lz_options;
return_if_error(lz_init(&coder->lz, allocator,
- filters[0].options, &lz_options));
+ filters[0].id, filters[0].options, &lz_options));
// If the dictionary size is very small, increase it to 4096 bytes.
// This is to prevent constant wrapping of the dictionary, which
@@ -301,11 +302,3 @@ lzma_lz_decoder_memusage(size_t dictionary_size)
{
return sizeof(lzma_coder) + (uint64_t)(dictionary_size);
}
-
-
-extern void
-lzma_lz_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
-{
- lzma_coder *coder = coder_ptr;
- coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size);
-}
diff --git a/contrib/libs/lzma/liblzma/lz/lz_decoder.h b/contrib/libs/lzma/liblzma/lz/lz_decoder.h
index 754ccf37c6..ad80d4dd0d 100644
--- a/contrib/libs/lzma/liblzma/lz/lz_decoder.h
+++ b/contrib/libs/lzma/liblzma/lz/lz_decoder.h
@@ -62,8 +62,10 @@ typedef struct {
void (*reset)(void *coder, const void *options);
- /// Set the uncompressed size
- void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size);
+ /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
+ /// then allow_eopm will always be true.
+ void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
+ bool allow_eopm);
/// Free allocated resources
void (*end)(void *coder, const lzma_allocator *allocator);
@@ -85,14 +87,12 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
const lzma_filter_info *filters,
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
- const lzma_allocator *allocator, const void *options,
+ const lzma_allocator *allocator,
+ lzma_vli id, const void *options,
lzma_lz_options *lz_options));
extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
-extern void lzma_lz_decoder_uncompressed(
- void *coder, lzma_vli uncompressed_size);
-
//////////////////////
// Inline functions //
diff --git a/contrib/libs/lzma/liblzma/lz/lz_encoder.c b/contrib/libs/lzma/liblzma/lz/lz_encoder.c
index ad7a303acb..63d4aa057f 100644
--- a/contrib/libs/lzma/liblzma/lz/lz_encoder.c
+++ b/contrib/libs/lzma/liblzma/lz/lz_encoder.c
@@ -293,11 +293,15 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
return true;
}
- // Calculate the sizes of mf->hash and mf->son and check that
- // nice_len is big enough for the selected match finder.
- const uint32_t hash_bytes = lz_options->match_finder & 0x0F;
- if (hash_bytes > mf->nice_len)
- return true;
+ // Calculate the sizes of mf->hash and mf->son.
+ //
+ // NOTE: Since 5.3.5beta the LZMA encoder ensures that nice_len
+ // is big enough for the selected match finder. This makes it
+ // easier for applications as nice_len = 2 will always be accepted
+ // even though the effective value can be slightly bigger.
+ const uint32_t hash_bytes
+ = mf_get_hash_bytes(lz_options->match_finder);
+ assert(hash_bytes <= mf->nice_len);
const bool is_bt = (lz_options->match_finder & 0x10) != 0;
uint32_t hs;
@@ -521,14 +525,30 @@ lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
}
+static lzma_ret
+lz_encoder_set_out_limit(void *coder_ptr, uint64_t *uncomp_size,
+ uint64_t out_limit)
+{
+ lzma_coder *coder = coder_ptr;
+
+ // This is supported only if there are no other filters chained.
+ if (coder->next.code == NULL && coder->lz.set_out_limit != NULL)
+ return coder->lz.set_out_limit(
+ coder->lz.coder, uncomp_size, out_limit);
+
+ return LZMA_OPTIONS_ERROR;
+}
+
+
extern lzma_ret
lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
const lzma_filter_info *filters,
lzma_ret (*lz_init)(lzma_lz_encoder *lz,
- const lzma_allocator *allocator, const void *options,
+ const lzma_allocator *allocator,
+ lzma_vli id, const void *options,
lzma_lz_options *lz_options))
{
-#ifdef HAVE_SMALL
+#if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR)
// We need that the CRC32 table has been initialized.
lzma_crc32_init();
#endif
@@ -544,6 +564,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
next->code = &lz_encode;
next->end = &lz_encoder_end;
next->update = &lz_encoder_update;
+ next->set_out_limit = &lz_encoder_set_out_limit;
coder->lz.coder = NULL;
coder->lz.code = NULL;
@@ -565,7 +586,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
// Initialize the LZ-based encoder.
lzma_lz_options lz_options;
return_if_error(lz_init(&coder->lz, allocator,
- filters[0].options, &lz_options));
+ filters[0].id, filters[0].options, &lz_options));
// Setup the size information into coder->mf and deallocate
// old buffers if they have wrong size.
@@ -585,32 +606,28 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
extern LZMA_API(lzma_bool)
lzma_mf_is_supported(lzma_match_finder mf)
{
- bool ret = false;
-
+ switch (mf) {
#ifdef HAVE_MF_HC3
- if (mf == LZMA_MF_HC3)
- ret = true;
+ case LZMA_MF_HC3:
+ return true;
#endif
-
#ifdef HAVE_MF_HC4
- if (mf == LZMA_MF_HC4)
- ret = true;
+ case LZMA_MF_HC4:
+ return true;
#endif
-
#ifdef HAVE_MF_BT2
- if (mf == LZMA_MF_BT2)
- ret = true;
+ case LZMA_MF_BT2:
+ return true;
#endif
-
#ifdef HAVE_MF_BT3
- if (mf == LZMA_MF_BT3)
- ret = true;
+ case LZMA_MF_BT3:
+ return true;
#endif
-
#ifdef HAVE_MF_BT4
- if (mf == LZMA_MF_BT4)
- ret = true;
+ case LZMA_MF_BT4:
+ return true;
#endif
-
- return ret;
+ default:
+ return false;
+ }
}
diff --git a/contrib/libs/lzma/liblzma/lz/lz_encoder.h b/contrib/libs/lzma/liblzma/lz/lz_encoder.h
index 426dcd8a38..7950a2f4ef 100644
--- a/contrib/libs/lzma/liblzma/lz/lz_encoder.h
+++ b/contrib/libs/lzma/liblzma/lz/lz_encoder.h
@@ -204,6 +204,10 @@ typedef struct {
/// Update the options in the middle of the encoding.
lzma_ret (*options_update)(void *coder, const lzma_filter *filter);
+ /// Set maximum allowed output size
+ lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size,
+ uint64_t out_limit);
+
} lzma_lz_encoder;
@@ -216,6 +220,15 @@ typedef struct {
// are called `read ahead'.
+/// Get how many bytes the match finder hashes in its initial step.
+/// This is also the minimum nice_len value with the match finder.
+static inline uint32_t
+mf_get_hash_bytes(lzma_match_finder match_finder)
+{
+ return (uint32_t)match_finder & 0x0F;
+}
+
+
/// Get pointer to the first byte not ran through the match finder
static inline const uint8_t *
mf_ptr(const lzma_mf *mf)
@@ -298,7 +311,8 @@ extern lzma_ret lzma_lz_encoder_init(
lzma_next_coder *next, const lzma_allocator *allocator,
const lzma_filter_info *filters,
lzma_ret (*lz_init)(lzma_lz_encoder *lz,
- const lzma_allocator *allocator, const void *options,
+ const lzma_allocator *allocator,
+ lzma_vli id, const void *options,
lzma_lz_options *lz_options));
diff --git a/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c b/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c
index cf1b5110ac..567df490ca 100644
--- a/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c
+++ b/contrib/libs/lzma/liblzma/lzma/lzma2_decoder.c
@@ -139,7 +139,7 @@ lzma2_decode(void *coder_ptr, lzma_dict *restrict dict,
coder->uncompressed_size += in[(*in_pos)++] + 1U;
coder->sequence = SEQ_COMPRESSED_0;
coder->lzma.set_uncompressed(coder->lzma.coder,
- coder->uncompressed_size);
+ coder->uncompressed_size, false);
break;
case SEQ_COMPRESSED_0:
@@ -226,7 +226,8 @@ lzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
static lzma_ret
lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
- const void *opt, lzma_lz_options *lz_options)
+ lzma_vli id lzma_attribute((__unused__)), const void *opt,
+ lzma_lz_options *lz_options)
{
lzma_lzma2_coder *coder = lz->coder;
if (coder == NULL) {
diff --git a/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c b/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c
index 63588ee30c..4b6b23118d 100644
--- a/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c
+++ b/contrib/libs/lzma/liblzma/lzma/lzma2_encoder.c
@@ -310,7 +310,8 @@ lzma2_encoder_options_update(void *coder_ptr, const lzma_filter *filter)
static lzma_ret
lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
- const void *options, lzma_lz_options *lz_options)
+ lzma_vli id lzma_attribute((__unused__)), const void *options,
+ lzma_lz_options *lz_options)
{
if (options == NULL)
return LZMA_PROG_ERROR;
@@ -340,7 +341,7 @@ lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
// Initialize LZMA encoder
return_if_error(lzma_lzma_encoder_create(&coder->lzma, allocator,
- &coder->opt_cur, lz_options));
+ LZMA_FILTER_LZMA2, &coder->opt_cur, lz_options));
// Make sure that we will always have enough history available in
// case we need to use uncompressed chunks. They are used when the
@@ -378,6 +379,9 @@ lzma_lzma2_encoder_memusage(const void *options)
extern lzma_ret
lzma_lzma2_props_encode(const void *options, uint8_t *out)
{
+ if (options == NULL)
+ return LZMA_PROG_ERROR;
+
const lzma_options_lzma *const opt = options;
uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN);
diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_decoder.c b/contrib/libs/lzma/liblzma/lzma/lzma_decoder.c
index e605a0a916..26c148a95e 100644
--- a/contrib/libs/lzma/liblzma/lzma/lzma_decoder.c
+++ b/contrib/libs/lzma/liblzma/lzma/lzma_decoder.c
@@ -238,6 +238,11 @@ typedef struct {
/// payload marker is expected.
lzma_vli uncompressed_size;
+ /// True if end of payload marker (EOPM) is allowed even when
+ /// uncompressed_size is known; false if EOPM must not be present.
+ /// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
+ bool allow_eopm;
+
////////////////////////////////
// State of incomplete symbol //
////////////////////////////////
@@ -343,12 +348,24 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
lzma_ret ret = LZMA_OK;
- // If uncompressed size is known, there must be no end of payload
- // marker.
- const bool no_eopm = coder->uncompressed_size
- != LZMA_VLI_UNKNOWN;
- if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
+ // This is true when the next LZMA symbol is allowed to be EOPM.
+ // That is, if this is false, then EOPM is considered
+ // an invalid symbol and we will return LZMA_DATA_ERROR.
+ //
+ // EOPM is always required (not just allowed) when
+ // the uncompressed size isn't known. When uncompressed size
+ // is known, eopm_is_valid may be set to true later.
+ bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
+
+ // If uncompressed size is known and there is enough output space
+ // to decode all the data, limit the available buffer space so that
+ // the main loop won't try to decode past the end of the stream.
+ bool might_finish_without_eopm = false;
+ if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
+ && coder->uncompressed_size <= dict.limit - dict.pos) {
dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
+ might_finish_without_eopm = true;
+ }
// The main decoder loop. The "switch" is used to restart the decoder at
// correct location. Once restarted, the "switch" is no longer used.
@@ -361,8 +378,32 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
case SEQ_NORMALIZE:
case SEQ_IS_MATCH:
- if (unlikely(no_eopm && dict.pos == dict.limit))
- break;
+ if (unlikely(might_finish_without_eopm
+ && dict.pos == dict.limit)) {
+ // In rare cases there is a useless byte that needs
+ // to be read anyway.
+ rc_normalize(SEQ_NORMALIZE);
+
+ // If the range decoder state is such that we can
+ // be at the end of the LZMA stream, then the
+ // decoding is finished.
+ if (rc_is_finished(rc)) {
+ ret = LZMA_STREAM_END;
+ goto out;
+ }
+
+ // If the caller hasn't allowed EOPM to be present
+ // together with known uncompressed size, then the
+ // LZMA stream is corrupt.
+ if (!coder->allow_eopm) {
+ ret = LZMA_DATA_ERROR;
+ goto out;
+ }
+
+ // Otherwise continue decoding with the expectation
+ // that the next LZMA symbol is EOPM.
+ eopm_is_valid = true;
+ }
rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
rc_update_0(coder->is_match[state][pos_state]);
@@ -658,11 +699,18 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
if (rep0 == UINT32_MAX) {
// End of payload marker was
- // found. It must not be
- // present if uncompressed
- // size is known.
- if (coder->uncompressed_size
- != LZMA_VLI_UNKNOWN) {
+ // found. It may only be
+ // present if
+ // - uncompressed size is
+ // unknown or
+ // - after known uncompressed
+ // size amount of bytes has
+ // been decompressed and
+ // caller has indicated
+ // that EOPM might be used
+ // (it's not allowed in
+ // LZMA2).
+ if (!eopm_is_valid) {
ret = LZMA_DATA_ERROR;
goto out;
}
@@ -671,7 +719,9 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
// LZMA1 stream with
// end-of-payload marker.
rc_normalize(SEQ_EOPM);
- ret = LZMA_STREAM_END;
+ ret = rc_is_finished(rc)
+ ? LZMA_STREAM_END
+ : LZMA_DATA_ERROR;
goto out;
}
}
@@ -793,9 +843,6 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
}
}
- rc_normalize(SEQ_NORMALIZE);
- coder->sequence = SEQ_IS_MATCH;
-
out:
// Save state
@@ -822,24 +869,21 @@ out:
if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
coder->uncompressed_size -= dict.pos - dict_start;
- // Since there cannot be end of payload marker if the
- // uncompressed size was known, we check here if we
- // finished decoding.
+ // If we have gotten all the output but the decoder wants
+ // to write more output, the file is corrupt. There are
+ // three SEQ values where output is produced.
if (coder->uncompressed_size == 0 && ret == LZMA_OK
- && coder->sequence != SEQ_NORMALIZE)
- ret = coder->sequence == SEQ_IS_MATCH
- ? LZMA_STREAM_END : LZMA_DATA_ERROR;
+ && (coder->sequence == SEQ_LITERAL_WRITE
+ || coder->sequence == SEQ_SHORTREP
+ || coder->sequence == SEQ_COPY))
+ ret = LZMA_DATA_ERROR;
}
- // We can do an additional check in the range decoder to catch some
- // corrupted files.
if (ret == LZMA_STREAM_END) {
- if (!rc_is_finished(coder->rc))
- ret = LZMA_DATA_ERROR;
-
// Reset the range decoder so that it is ready to reinitialize
// for a new LZMA2 chunk.
rc_reset(coder->rc);
+ coder->sequence = SEQ_IS_MATCH;
}
return ret;
@@ -848,10 +892,12 @@ out:
static void
-lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
+lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
+ bool allow_eopm)
{
lzma_lzma1_decoder *coder = coder_ptr;
coder->uncompressed_size = uncompressed_size;
+ coder->allow_eopm = allow_eopm;
}
@@ -940,7 +986,7 @@ lzma_decoder_reset(void *coder_ptr, const void *opt)
extern lzma_ret
lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
- const void *opt, lzma_lz_options *lz_options)
+ const lzma_options_lzma *options, lzma_lz_options *lz_options)
{
if (lz->coder == NULL) {
lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
@@ -954,7 +1000,6 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
// All dictionary sizes are OK here. LZ decoder will take care of
// the special cases.
- const lzma_options_lzma *options = opt;
lz_options->dict_size = options->dict_size;
lz_options->preset_dict = options->preset_dict;
lz_options->preset_dict_size = options->preset_dict_size;
@@ -968,16 +1013,40 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
/// the LZ initialization).
static lzma_ret
lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
- const void *options, lzma_lz_options *lz_options)
+ lzma_vli id, const void *options, lzma_lz_options *lz_options)
{
if (!is_lclppb_valid(options))
return LZMA_PROG_ERROR;
+ lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
+ bool allow_eopm = true;
+
+ if (id == LZMA_FILTER_LZMA1EXT) {
+ const lzma_options_lzma *opt = options;
+
+ // Only one flag is supported.
+ if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
+ return LZMA_OPTIONS_ERROR;
+
+ // FIXME? Using lzma_vli instead of uint64_t is weird because
+ // this has nothing to do with .xz headers and variable-length
+ // integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
+ // instead of UINT64_MAX is clearer when unknown size is
+ // meant. A problem with using lzma_vli is that now we
+ // allow > LZMA_VLI_MAX which is fine in this file but
+ // it's still confusing. Note that alone_decoder.c also
+ // allows > LZMA_VLI_MAX when setting uncompressed size.
+ uncomp_size = opt->ext_size_low
+ + ((uint64_t)(opt->ext_size_high) << 32);
+ allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
+ || uncomp_size == LZMA_VLI_UNKNOWN;
+ }
+
return_if_error(lzma_lzma_decoder_create(
lz, allocator, options, lz_options));
lzma_decoder_reset(lz->coder, options);
- lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
+ lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
return LZMA_OK;
}
diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_decoder.h b/contrib/libs/lzma/liblzma/lzma/lzma_decoder.h
index fa8ecb23e4..1427bc2461 100644
--- a/contrib/libs/lzma/liblzma/lzma/lzma_decoder.h
+++ b/contrib/libs/lzma/liblzma/lzma/lzma_decoder.h
@@ -42,7 +42,7 @@ extern bool lzma_lzma_lclppb_decode(
/// LZMA2 decoders.
extern lzma_ret lzma_lzma_decoder_create(
lzma_lz_decoder *lz, const lzma_allocator *allocator,
- const void *opt, lzma_lz_options *lz_options);
+ const lzma_options_lzma *opt, lzma_lz_options *lz_options);
/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2
/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb.
diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder.c b/contrib/libs/lzma/liblzma/lzma/lzma_encoder.c
index 07d2b87bc6..dc62f44f1b 100644
--- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder.c
+++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder.c
@@ -268,6 +268,7 @@ static bool
encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf)
{
assert(mf_position(mf) == 0);
+ assert(coder->uncomp_size == 0);
if (mf->read_pos == mf->read_limit) {
if (mf->action == LZMA_RUN)
@@ -283,6 +284,7 @@ encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf)
mf->read_ahead = 0;
rc_bit(&coder->rc, &coder->is_match[0][0], 0);
rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]);
+ ++coder->uncomp_size;
}
// Initialization is done (except if empty file).
@@ -317,21 +319,28 @@ lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
if (!coder->is_initialized && !encode_init(coder, mf))
return LZMA_OK;
- // Get the lowest bits of the uncompressed offset from the LZ layer.
- uint32_t position = mf_position(mf);
+ // Encode pending output bytes from the range encoder.
+ // At the start of the stream, encode_init() encodes one literal.
+ // Later there can be pending output only with LZMA1 because LZMA2
+ // ensures that there is always enough output space. Thus when using
+ // LZMA2, rc_encode() calls in this function will always return false.
+ if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+ // We don't get here with LZMA2.
+ assert(limit == UINT32_MAX);
+ return LZMA_OK;
+ }
- while (true) {
- // Encode pending bits, if any. Calling this before encoding
- // the next symbol is needed only with plain LZMA, since
- // LZMA2 always provides big enough buffer to flush
- // everything out from the range encoder. For the same reason,
- // rc_encode() never returns true when this function is used
- // as part of LZMA2 encoder.
- if (rc_encode(&coder->rc, out, out_pos, out_size)) {
- assert(limit == UINT32_MAX);
- return LZMA_OK;
- }
+ // If the range encoder was flushed in an earlier call to this
+ // function but there wasn't enough output buffer space, those
+ // bytes would have now been encoded by the above rc_encode() call
+ // and the stream has now been finished. This can only happen with
+ // LZMA1 as LZMA2 always provides enough output buffer space.
+ if (coder->is_flushed) {
+ assert(limit == UINT32_MAX);
+ return LZMA_STREAM_END;
+ }
+ while (true) {
// With LZMA2 we need to take care that compressed size of
// a chunk doesn't get too big.
// FIXME? Check if this could be improved.
@@ -365,37 +374,64 @@ lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
if (coder->fast_mode)
lzma_lzma_optimum_fast(coder, mf, &back, &len);
else
- lzma_lzma_optimum_normal(
- coder, mf, &back, &len, position);
-
- encode_symbol(coder, mf, back, len, position);
+ lzma_lzma_optimum_normal(coder, mf, &back, &len,
+ (uint32_t)(coder->uncomp_size));
+
+ encode_symbol(coder, mf, back, len,
+ (uint32_t)(coder->uncomp_size));
+
+ // If output size limiting is active (out_limit != 0), check
+ // if encoding this LZMA symbol would make the output size
+ // exceed the specified limit.
+ if (coder->out_limit != 0 && rc_encode_dummy(
+ &coder->rc, coder->out_limit)) {
+ // The most recent LZMA symbol would make the output
+ // too big. Throw it away.
+ rc_forget(&coder->rc);
+
+ // FIXME: Tell the LZ layer to not read more input as
+ // it would be waste of time. This doesn't matter if
+ // output-size-limited encoding is done with a single
+ // call though.
- position += len;
- }
-
- if (!coder->is_flushed) {
- coder->is_flushed = true;
-
- // We don't support encoding plain LZMA streams without EOPM,
- // and LZMA2 doesn't use EOPM at LZMA level.
- if (limit == UINT32_MAX)
- encode_eopm(coder, position);
+ break;
+ }
- // Flush the remaining bytes from the range encoder.
- rc_flush(&coder->rc);
+ // This symbol will be encoded so update the uncompressed size.
+ coder->uncomp_size += len;
- // Copy the remaining bytes to the output buffer. If there
- // isn't enough output space, we will copy out the remaining
- // bytes on the next call to this function by using
- // the rc_encode() call in the encoding loop above.
+ // Encode the LZMA symbol.
if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+ // Once again, this can only happen with LZMA1.
assert(limit == UINT32_MAX);
return LZMA_OK;
}
}
- // Make it ready for the next LZMA2 chunk.
- coder->is_flushed = false;
+ // Make the uncompressed size available to the application.
+ if (coder->uncomp_size_ptr != NULL)
+ *coder->uncomp_size_ptr = coder->uncomp_size;
+
+ // LZMA2 doesn't use EOPM at LZMA level.
+ //
+ // Plain LZMA streams without EOPM aren't supported except when
+ // output size limiting is enabled.
+ if (coder->use_eopm)
+ encode_eopm(coder, (uint32_t)(coder->uncomp_size));
+
+ // Flush the remaining bytes from the range encoder.
+ rc_flush(&coder->rc);
+
+ // Copy the remaining bytes to the output buffer. If there
+ // isn't enough output space, we will copy out the remaining
+ // bytes on the next call to this function.
+ if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+ // This cannot happen with LZMA2.
+ assert(limit == UINT32_MAX);
+
+ coder->is_flushed = true;
+ return LZMA_OK;
+ }
return LZMA_STREAM_END;
}
@@ -414,6 +450,23 @@ lzma_encode(void *coder, lzma_mf *restrict mf,
}
+static lzma_ret
+lzma_lzma_set_out_limit(
+ void *coder_ptr, uint64_t *uncomp_size, uint64_t out_limit)
+{
+ // Minimum output size is 5 bytes but that cannot hold any output
+ // so we use 6 bytes.
+ if (out_limit < 6)
+ return LZMA_BUF_ERROR;
+
+ lzma_lzma1_encoder *coder = coder_ptr;
+ coder->out_limit = out_limit;
+ coder->uncomp_size_ptr = uncomp_size;
+ coder->use_eopm = false;
+ return LZMA_OK;
+}
+
+
////////////////////
// Initialization //
////////////////////
@@ -440,7 +493,8 @@ set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options)
lz_options->dict_size = options->dict_size;
lz_options->after_size = LOOP_INPUT_MAX;
lz_options->match_len_max = MATCH_LEN_MAX;
- lz_options->nice_len = options->nice_len;
+ lz_options->nice_len = my_max(mf_get_hash_bytes(options->mf),
+ options->nice_len);
lz_options->match_finder = options->mf;
lz_options->depth = options->depth;
lz_options->preset_dict = options->preset_dict;
@@ -546,10 +600,13 @@ lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder,
extern lzma_ret
-lzma_lzma_encoder_create(void **coder_ptr,
- const lzma_allocator *allocator,
- const lzma_options_lzma *options, lzma_lz_options *lz_options)
+lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator,
+ lzma_vli id, const lzma_options_lzma *options,
+ lzma_lz_options *lz_options)
{
+ assert(id == LZMA_FILTER_LZMA1 || id == LZMA_FILTER_LZMA1EXT
+ || id == LZMA_FILTER_LZMA2);
+
// Allocate lzma_lzma1_encoder if it wasn't already allocated.
if (*coder_ptr == NULL) {
*coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator);
@@ -559,10 +616,9 @@ lzma_lzma_encoder_create(void **coder_ptr,
lzma_lzma1_encoder *coder = *coder_ptr;
- // Set compression mode. We haven't validates the options yet,
- // but it's OK here, since nothing bad happens with invalid
- // options in the code below, and they will get rejected by
- // lzma_lzma_encoder_reset() call at the end of this function.
+ // Set compression mode. Note that we haven't validated the options
+ // yet. Invalid options will get rejected by lzma_lzma_encoder_reset()
+ // call at the end of this function.
switch (options->mode) {
case LZMA_MODE_FAST:
coder->fast_mode = true;
@@ -573,6 +629,18 @@ lzma_lzma_encoder_create(void **coder_ptr,
// Set dist_table_size.
// Round the dictionary size up to next 2^n.
+ //
+ // Currently the maximum encoder dictionary size
+ // is 1.5 GiB due to lz_encoder.c and here we need
+ // to be below 2 GiB to make the rounded up value
+ // fit in an uint32_t and avoid an infite while-loop
+ // (and undefined behavior due to a too large shift).
+ // So do the same check as in LZ encoder,
+ // limiting to 1.5 GiB.
+ if (options->dict_size > (UINT32_C(1) << 30)
+ + (UINT32_C(1) << 29))
+ return LZMA_OPTIONS_ERROR;
+
uint32_t log_size = 0;
while ((UINT32_C(1) << log_size) < options->dict_size)
++log_size;
@@ -580,10 +648,14 @@ lzma_lzma_encoder_create(void **coder_ptr,
coder->dist_table_size = log_size * 2;
// Length encoders' price table size
+ const uint32_t nice_len = my_max(
+ mf_get_hash_bytes(options->mf),
+ options->nice_len);
+
coder->match_len_encoder.table_size
- = options->nice_len + 1 - MATCH_LEN_MIN;
+ = nice_len + 1 - MATCH_LEN_MIN;
coder->rep_len_encoder.table_size
- = options->nice_len + 1 - MATCH_LEN_MIN;
+ = nice_len + 1 - MATCH_LEN_MIN;
break;
}
@@ -598,6 +670,37 @@ lzma_lzma_encoder_create(void **coder_ptr,
coder->is_initialized = options->preset_dict != NULL
&& options->preset_dict_size > 0;
coder->is_flushed = false;
+ coder->uncomp_size = 0;
+ coder->uncomp_size_ptr = NULL;
+
+ // Output size limitting is disabled by default.
+ coder->out_limit = 0;
+
+ // Determine if end marker is wanted:
+ // - It is never used with LZMA2.
+ // - It is always used with LZMA_FILTER_LZMA1 (unless
+ // lzma_lzma_set_out_limit() is called later).
+ // - LZMA_FILTER_LZMA1EXT has a flag for it in the options.
+ coder->use_eopm = (id == LZMA_FILTER_LZMA1);
+ if (id == LZMA_FILTER_LZMA1EXT) {
+ // Check if unsupported flags are present.
+ if (options->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
+ return LZMA_OPTIONS_ERROR;
+
+ coder->use_eopm = (options->ext_flags
+ & LZMA_LZMA1EXT_ALLOW_EOPM) != 0;
+
+ // TODO? As long as there are no filters that change the size
+ // of the data, it is enough to look at lzma_stream.total_in
+ // after encoding has been finished to know the uncompressed
+ // size of the LZMA1 stream. But in the future there could be
+ // filters that change the size of the data and then total_in
+ // doesn't work as the LZMA1 stream size might be different
+ // due to another filter in the chain. The problem is simple
+ // to solve: Add another flag to ext_flags and then set
+ // coder->uncomp_size_ptr to the address stored in
+ // lzma_options_lzma.reserved_ptr2 (or _ptr1).
+ }
set_lz_options(lz_options, options);
@@ -607,11 +710,12 @@ lzma_lzma_encoder_create(void **coder_ptr,
static lzma_ret
lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
- const void *options, lzma_lz_options *lz_options)
+ lzma_vli id, const void *options, lzma_lz_options *lz_options)
{
lz->code = &lzma_encode;
+ lz->set_out_limit = &lzma_lzma_set_out_limit;
return lzma_lzma_encoder_create(
- &lz->coder, allocator, options, lz_options);
+ &lz->coder, allocator, id, options, lz_options);
}
@@ -658,6 +762,9 @@ lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte)
extern lzma_ret
lzma_lzma_props_encode(const void *options, uint8_t *out)
{
+ if (options == NULL)
+ return LZMA_PROG_ERROR;
+
const lzma_options_lzma *const opt = options;
if (lzma_lzma_lclppb_encode(opt, out))
diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder.h b/contrib/libs/lzma/liblzma/lzma/lzma_encoder.h
index 6cfdf228bf..84d8c9163f 100644
--- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder.h
+++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder.h
@@ -40,7 +40,8 @@ extern bool lzma_lzma_lclppb_encode(
/// Initializes raw LZMA encoder; this is used by LZMA2.
extern lzma_ret lzma_lzma_encoder_create(
void **coder_ptr, const lzma_allocator *allocator,
- const lzma_options_lzma *options, lzma_lz_options *lz_options);
+ lzma_vli id, const lzma_options_lzma *options,
+ lzma_lz_options *lz_options);
/// Resets an already initialized LZMA encoder; this is used by LZMA2.
diff --git a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h
index 2e34aace16..b228c57761 100644
--- a/contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h
+++ b/contrib/libs/lzma/liblzma/lzma/lzma_encoder_private.h
@@ -72,6 +72,18 @@ struct lzma_lzma1_encoder_s {
/// Range encoder
lzma_range_encoder rc;
+ /// Uncompressed size (doesn't include possible preset dictionary)
+ uint64_t uncomp_size;
+
+ /// If non-zero, produce at most this much output.
+ /// Some input may then be missing from the output.
+ uint64_t out_limit;
+
+ /// If the above out_limit is non-zero, *uncomp_size_ptr is set to
+ /// the amount of uncompressed data that we were able to fit
+ /// in the output buffer.
+ uint64_t *uncomp_size_ptr;
+
/// State
lzma_lzma_state state;
@@ -99,6 +111,9 @@ struct lzma_lzma1_encoder_s {
/// have been written to the output buffer yet.
bool is_flushed;
+ /// True if end of payload marker will be written.
+ bool use_eopm;
+
uint32_t pos_mask; ///< (1 << pos_bits) - 1
uint32_t literal_context_bits;
uint32_t literal_pos_mask;
diff --git a/contrib/libs/lzma/liblzma/rangecoder/range_encoder.h b/contrib/libs/lzma/liblzma/rangecoder/range_encoder.h
index 1e1c36995b..d794eabbcc 100644
--- a/contrib/libs/lzma/liblzma/rangecoder/range_encoder.h
+++ b/contrib/libs/lzma/liblzma/rangecoder/range_encoder.h
@@ -19,9 +19,9 @@
/// Maximum number of symbols that can be put pending into lzma_range_encoder
-/// structure between calls to lzma_rc_encode(). For LZMA, 52+5 is enough
+/// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough
/// (match with big distance and length followed by range encoder flush).
-#define RC_SYMBOLS_MAX 58
+#define RC_SYMBOLS_MAX 53
typedef struct {
@@ -30,6 +30,9 @@ typedef struct {
uint32_t range;
uint8_t cache;
+ /// Number of bytes written out by rc_encode() -> rc_shift_low()
+ uint64_t out_total;
+
/// Number of symbols in the tables
size_t count;
@@ -58,12 +61,22 @@ rc_reset(lzma_range_encoder *rc)
rc->cache_size = 1;
rc->range = UINT32_MAX;
rc->cache = 0;
+ rc->out_total = 0;
rc->count = 0;
rc->pos = 0;
}
static inline void
+rc_forget(lzma_range_encoder *rc)
+{
+ // This must not be called when rc_encode() is partially done.
+ assert(rc->pos == 0);
+ rc->count = 0;
+}
+
+
+static inline void
rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
{
rc->symbols[rc->count] = bit;
@@ -132,6 +145,7 @@ rc_shift_low(lzma_range_encoder *rc,
out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
++*out_pos;
+ ++rc->out_total;
rc->cache = 0xFF;
} while (--rc->cache_size != 0);
@@ -146,6 +160,34 @@ rc_shift_low(lzma_range_encoder *rc,
}
+// NOTE: The last two arguments are uint64_t instead of size_t because in
+// the dummy version these refer to the size of the whole range-encoded
+// output stream, not just to the currently available output buffer space.
+static inline bool
+rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache,
+ uint64_t *out_pos, uint64_t out_size)
+{
+ if ((uint32_t)(*low) < (uint32_t)(0xFF000000)
+ || (uint32_t)(*low >> 32) != 0) {
+ do {
+ if (*out_pos == out_size)
+ return true;
+
+ ++*out_pos;
+ *cache = 0xFF;
+
+ } while (--*cache_size != 0);
+
+ *cache = (*low >> 24) & 0xFF;
+ }
+
+ ++*cache_size;
+ *low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS;
+
+ return false;
+}
+
+
static inline bool
rc_encode(lzma_range_encoder *rc,
uint8_t *out, size_t *out_pos, size_t out_size)
@@ -222,6 +264,83 @@ rc_encode(lzma_range_encoder *rc,
}
+static inline bool
+rc_encode_dummy(const lzma_range_encoder *rc, uint64_t out_limit)
+{
+ assert(rc->count <= RC_SYMBOLS_MAX);
+
+ uint64_t low = rc->low;
+ uint64_t cache_size = rc->cache_size;
+ uint32_t range = rc->range;
+ uint8_t cache = rc->cache;
+ uint64_t out_pos = rc->out_total;
+
+ size_t pos = rc->pos;
+
+ while (true) {
+ // Normalize
+ if (range < RC_TOP_VALUE) {
+ if (rc_shift_low_dummy(&low, &cache_size, &cache,
+ &out_pos, out_limit))
+ return true;
+
+ range <<= RC_SHIFT_BITS;
+ }
+
+ // This check is here because the normalization above must
+ // be done before flushing the last bytes.
+ if (pos == rc->count)
+ break;
+
+ // Encode a bit
+ switch (rc->symbols[pos]) {
+ case RC_BIT_0: {
+ probability prob = *rc->probs[pos];
+ range = (range >> RC_BIT_MODEL_TOTAL_BITS)
+ * prob;
+ break;
+ }
+
+ case RC_BIT_1: {
+ probability prob = *rc->probs[pos];
+ const uint32_t bound = prob * (range
+ >> RC_BIT_MODEL_TOTAL_BITS);
+ low += bound;
+ range -= bound;
+ break;
+ }
+
+ case RC_DIRECT_0:
+ range >>= 1;
+ break;
+
+ case RC_DIRECT_1:
+ range >>= 1;
+ low += range;
+ break;
+
+ case RC_FLUSH:
+ default:
+ assert(0);
+ break;
+ }
+
+ ++pos;
+ }
+
+ // Flush the last bytes. This isn't in rc->symbols[] so we do
+ // it after the above loop to take into account the size of
+ // the flushing that will be done at the end of the stream.
+ for (pos = 0; pos < 5; ++pos) {
+ if (rc_shift_low_dummy(&low, &cache_size,
+ &cache, &out_pos, out_limit))
+ return true;
+ }
+
+ return false;
+}
+
+
static inline uint64_t
rc_pending(const lzma_range_encoder *rc)
{
diff --git a/contrib/libs/lzma/liblzma/simple/arm.c b/contrib/libs/lzma/liblzma/simple/arm.c
index ff5073ae58..6e53970d2f 100644
--- a/contrib/libs/lzma/liblzma/simple/arm.c
+++ b/contrib/libs/lzma/liblzma/simple/arm.c
@@ -53,6 +53,7 @@ arm_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
}
+#ifdef HAVE_ENCODER_ARM
extern lzma_ret
lzma_simple_arm_encoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -60,8 +61,10 @@ lzma_simple_arm_encoder_init(lzma_next_coder *next,
{
return arm_coder_init(next, allocator, filters, true);
}
+#endif
+#ifdef HAVE_DECODER_ARM
extern lzma_ret
lzma_simple_arm_decoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -69,3 +72,4 @@ lzma_simple_arm_decoder_init(lzma_next_coder *next,
{
return arm_coder_init(next, allocator, filters, false);
}
+#endif
diff --git a/contrib/libs/lzma/liblzma/simple/arm64.c b/contrib/libs/lzma/liblzma/simple/arm64.c
new file mode 100644
index 0000000000..5e7f26562d
--- /dev/null
+++ b/contrib/libs/lzma/liblzma/simple/arm64.c
@@ -0,0 +1,136 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file arm64.c
+/// \brief Filter for ARM64 binaries
+///
+/// This converts ARM64 relative addresses in the BL and ADRP immediates
+/// to absolute values to increase redundancy of ARM64 code.
+///
+/// Converting B or ADR instructions was also tested but it's not useful.
+/// A majority of the jumps for the B instruction are very small (+/- 0xFF).
+/// These are typical for loops and if-statements. Encoding them to their
+/// absolute address reduces redundancy since many of the small relative
+/// jump values are repeated, but very few of the absolute addresses are.
+//
+// Authors: Lasse Collin
+// Jia Tan
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+static size_t
+arm64_code(void *simple lzma_attribute((__unused__)),
+ uint32_t now_pos, bool is_encoder,
+ uint8_t *buffer, size_t size)
+{
+ size_t i;
+
+ // Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower
+ // with auto-vectorization that is enabled by default with -O2.
+ // Such vectorization bloat happens with -O2 when targeting ARM64 too
+ // but performance hasn't been tested.
+#ifdef __clang__
+# pragma clang loop vectorize(disable)
+#endif
+ for (i = 0; i + 4 <= size; i += 4) {
+ uint32_t pc = (uint32_t)(now_pos + i);
+ uint32_t instr = read32le(buffer + i);
+
+ if ((instr >> 26) == 0x25) {
+ // BL instruction:
+ // The full 26-bit immediate is converted.
+ // The range is +/-128 MiB.
+ //
+ // Using the full range is helps quite a lot with
+ // big executables. Smaller range would reduce false
+ // positives in non-code sections of the input though
+ // so this is a compromise that slightly favors big
+ // files. With the full range only six bits of the 32
+ // need to match to trigger a conversion.
+ const uint32_t src = instr;
+ instr = 0x94000000;
+
+ pc >>= 2;
+ if (!is_encoder)
+ pc = 0U - pc;
+
+ instr |= (src + pc) & 0x03FFFFFF;
+ write32le(buffer + i, instr);
+
+ } else if ((instr & 0x9F000000) == 0x90000000) {
+ // ADRP instruction:
+ // Only values in the range +/-512 MiB are converted.
+ //
+ // Using less than the full +/-4 GiB range reduces
+ // false positives on non-code sections of the input
+ // while being excellent for executables up to 512 MiB.
+ // The positive effect of ADRP conversion is smaller
+ // than that of BL but it also doesn't hurt so much in
+ // non-code sections of input because, with +/-512 MiB
+ // range, nine bits of 32 need to match to trigger a
+ // conversion (two 10-bit match choices = 9 bits).
+ const uint32_t src = ((instr >> 29) & 3)
+ | ((instr >> 3) & 0x001FFFFC);
+
+ // With the addition only one branch is needed to
+ // check the +/- range. This is usually false when
+ // processing ARM64 code so branch prediction will
+ // handle it well in terms of performance.
+ //
+ //if ((src & 0x001E0000) != 0
+ // && (src & 0x001E0000) != 0x001E0000)
+ if ((src + 0x00020000) & 0x001C0000)
+ continue;
+
+ instr &= 0x9000001F;
+
+ pc >>= 12;
+ if (!is_encoder)
+ pc = 0U - pc;
+
+ const uint32_t dest = src + pc;
+ instr |= (dest & 3) << 29;
+ instr |= (dest & 0x0003FFFC) << 3;
+ instr |= (0U - (dest & 0x00020000)) & 0x00E00000;
+ write32le(buffer + i, instr);
+ }
+ }
+
+ return i;
+}
+
+
+static lzma_ret
+arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters, bool is_encoder)
+{
+ return lzma_simple_coder_init(next, allocator, filters,
+ &arm64_code, 0, 4, 4, is_encoder);
+}
+
+
+#ifdef HAVE_ENCODER_ARM64
+extern lzma_ret
+lzma_simple_arm64_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return arm64_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_ARM64
+extern lzma_ret
+lzma_simple_arm64_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return arm64_coder_init(next, allocator, filters, false);
+}
+#endif
diff --git a/contrib/libs/lzma/liblzma/simple/armthumb.c b/contrib/libs/lzma/liblzma/simple/armthumb.c
index a8da334a04..25d8dbd4f3 100644
--- a/contrib/libs/lzma/liblzma/simple/armthumb.c
+++ b/contrib/libs/lzma/liblzma/simple/armthumb.c
@@ -58,6 +58,7 @@ armthumb_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
}
+#ifdef HAVE_ENCODER_ARMTHUMB
extern lzma_ret
lzma_simple_armthumb_encoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -65,8 +66,10 @@ lzma_simple_armthumb_encoder_init(lzma_next_coder *next,
{
return armthumb_coder_init(next, allocator, filters, true);
}
+#endif
+#ifdef HAVE_DECODER_ARMTHUMB
extern lzma_ret
lzma_simple_armthumb_decoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -74,3 +77,4 @@ lzma_simple_armthumb_decoder_init(lzma_next_coder *next,
{
return armthumb_coder_init(next, allocator, filters, false);
}
+#endif
diff --git a/contrib/libs/lzma/liblzma/simple/ia64.c b/contrib/libs/lzma/liblzma/simple/ia64.c
index 6492d0a384..692b0a295e 100644
--- a/contrib/libs/lzma/liblzma/simple/ia64.c
+++ b/contrib/libs/lzma/liblzma/simple/ia64.c
@@ -94,6 +94,7 @@ ia64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
}
+#ifdef HAVE_ENCODER_IA64
extern lzma_ret
lzma_simple_ia64_encoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -101,8 +102,10 @@ lzma_simple_ia64_encoder_init(lzma_next_coder *next,
{
return ia64_coder_init(next, allocator, filters, true);
}
+#endif
+#ifdef HAVE_DECODER_IA64
extern lzma_ret
lzma_simple_ia64_decoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -110,3 +113,4 @@ lzma_simple_ia64_decoder_init(lzma_next_coder *next,
{
return ia64_coder_init(next, allocator, filters, false);
}
+#endif
diff --git a/contrib/libs/lzma/liblzma/simple/powerpc.c b/contrib/libs/lzma/liblzma/simple/powerpc.c
index 0b60e9b3fe..3a340fd171 100644
--- a/contrib/libs/lzma/liblzma/simple/powerpc.c
+++ b/contrib/libs/lzma/liblzma/simple/powerpc.c
@@ -58,6 +58,7 @@ powerpc_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
}
+#ifdef HAVE_ENCODER_POWERPC
extern lzma_ret
lzma_simple_powerpc_encoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -65,8 +66,10 @@ lzma_simple_powerpc_encoder_init(lzma_next_coder *next,
{
return powerpc_coder_init(next, allocator, filters, true);
}
+#endif
+#ifdef HAVE_DECODER_POWERPC
extern lzma_ret
lzma_simple_powerpc_decoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -74,3 +77,4 @@ lzma_simple_powerpc_decoder_init(lzma_next_coder *next,
{
return powerpc_coder_init(next, allocator, filters, false);
}
+#endif
diff --git a/contrib/libs/lzma/liblzma/simple/simple_coder.h b/contrib/libs/lzma/liblzma/simple/simple_coder.h
index 19c2ee03af..668a5092ad 100644
--- a/contrib/libs/lzma/liblzma/simple/simple_coder.h
+++ b/contrib/libs/lzma/liblzma/simple/simple_coder.h
@@ -61,6 +61,15 @@ extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next,
const lzma_filter_info *filters);
+extern lzma_ret lzma_simple_arm64_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+extern lzma_ret lzma_simple_arm64_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+
extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
const lzma_filter_info *filters);
diff --git a/contrib/libs/lzma/liblzma/simple/sparc.c b/contrib/libs/lzma/liblzma/simple/sparc.c
index 74b2655f36..bad8492ebc 100644
--- a/contrib/libs/lzma/liblzma/simple/sparc.c
+++ b/contrib/libs/lzma/liblzma/simple/sparc.c
@@ -65,6 +65,7 @@ sparc_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
}
+#ifdef HAVE_ENCODER_SPARC
extern lzma_ret
lzma_simple_sparc_encoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -72,8 +73,10 @@ lzma_simple_sparc_encoder_init(lzma_next_coder *next,
{
return sparc_coder_init(next, allocator, filters, true);
}
+#endif
+#ifdef HAVE_DECODER_SPARC
extern lzma_ret
lzma_simple_sparc_decoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -81,3 +84,4 @@ lzma_simple_sparc_decoder_init(lzma_next_coder *next,
{
return sparc_coder_init(next, allocator, filters, false);
}
+#endif
diff --git a/contrib/libs/lzma/liblzma/simple/x86.c b/contrib/libs/lzma/liblzma/simple/x86.c
index 0e78909ccc..232b29542e 100644
--- a/contrib/libs/lzma/liblzma/simple/x86.c
+++ b/contrib/libs/lzma/liblzma/simple/x86.c
@@ -141,6 +141,7 @@ x86_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
}
+#ifdef HAVE_ENCODER_X86
extern lzma_ret
lzma_simple_x86_encoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -148,8 +149,10 @@ lzma_simple_x86_encoder_init(lzma_next_coder *next,
{
return x86_coder_init(next, allocator, filters, true);
}
+#endif
+#ifdef HAVE_DECODER_X86
extern lzma_ret
lzma_simple_x86_decoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
@@ -157,3 +160,4 @@ lzma_simple_x86_decoder_init(lzma_next_coder *next,
{
return x86_coder_init(next, allocator, filters, false);
}
+#endif