aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoruzhas <uzhas@ydb.tech>2023-11-15 15:42:18 +0300
committeruzhas <uzhas@ydb.tech>2023-11-15 17:27:24 +0300
commit545d86327a12a599340eb9f5866b1637a9f61efc (patch)
treec29c68073592accbe0b1ddf1ab1b453ece0762a7
parentff4f46c88a1fe37f2ee668b9d9c75306ac973132 (diff)
downloadydb-545d86327a12a599340eb9f5866b1637a9f61efc.tar.gz
add compress base UDF
-rw-r--r--.mapping.json15
-rw-r--r--library/cpp/streams/CMakeLists.txt1
-rw-r--r--library/cpp/streams/xz/CMakeLists.darwin-x86_64.txt18
-rw-r--r--library/cpp/streams/xz/CMakeLists.linux-aarch64.txt19
-rw-r--r--library/cpp/streams/xz/CMakeLists.linux-x86_64.txt19
-rw-r--r--library/cpp/streams/xz/CMakeLists.txt17
-rw-r--r--library/cpp/streams/xz/CMakeLists.windows-x86_64.txt18
-rw-r--r--library/cpp/streams/xz/README.md9
-rw-r--r--library/cpp/streams/xz/decompress.cpp326
-rw-r--r--library/cpp/streams/xz/decompress.h40
-rw-r--r--library/cpp/streams/xz/ut/decompress_ut.cpp26
-rw-r--r--library/cpp/streams/xz/ut/ya.make11
-rw-r--r--library/cpp/streams/xz/ya.make15
-rw-r--r--ydb/library/yql/udfs/common/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/udfs/common/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/udfs/common/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/udfs/common/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/udfs/common/compress_base/CMakeLists.darwin-x86_64.txt67
-rw-r--r--ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-aarch64.txt75
-rw-r--r--ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-x86_64.txt75
-rw-r--r--ydb/library/yql/udfs/common/compress_base/CMakeLists.txt17
-rw-r--r--ydb/library/yql/udfs/common/compress_base/CMakeLists.windows-x86_64.txt60
-rw-r--r--ydb/library/yql/udfs/common/compress_base/compress_udf.cpp17
-rw-r--r--ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.darwin-x86_64.txt29
-rw-r--r--ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-aarch64.txt30
-rw-r--r--ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-x86_64.txt30
-rw-r--r--ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.txt17
-rw-r--r--ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.windows-x86_64.txt29
-rw-r--r--ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.cpp1
-rw-r--r--ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.h218
-rw-r--r--ydb/library/yql/udfs/common/compress_base/lib/ya.make23
-rw-r--r--ydb/library/yql/udfs/common/compress_base/test/canondata/result.json12
-rw-r--r--ydb/library/yql/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt124
-rw-r--r--ydb/library/yql/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt188
-rw-r--r--ydb/library/yql/udfs/common/compress_base/test/cases/RoundTrip.sql12
-rw-r--r--ydb/library/yql/udfs/common/compress_base/test/cases/TryDecompress.sql19
-rw-r--r--ydb/library/yql/udfs/common/compress_base/test/cases/default.in3
-rw-r--r--ydb/library/yql/udfs/common/compress_base/test/ya.make11
-rw-r--r--ydb/library/yql/udfs/common/compress_base/ya.make22
-rw-r--r--ydb/library/yql/udfs/common/ya.make1
40 files changed, 1618 insertions, 0 deletions
diff --git a/.mapping.json b/.mapping.json
index f655346d09..7a93a54fdb 100644
--- a/.mapping.json
+++ b/.mapping.json
@@ -2754,6 +2754,11 @@
"library/cpp/streams/lzma/CMakeLists.linux-x86_64.txt":"",
"library/cpp/streams/lzma/CMakeLists.txt":"",
"library/cpp/streams/lzma/CMakeLists.windows-x86_64.txt":"",
+ "library/cpp/streams/xz/CMakeLists.darwin-x86_64.txt":"",
+ "library/cpp/streams/xz/CMakeLists.linux-aarch64.txt":"",
+ "library/cpp/streams/xz/CMakeLists.linux-x86_64.txt":"",
+ "library/cpp/streams/xz/CMakeLists.txt":"",
+ "library/cpp/streams/xz/CMakeLists.windows-x86_64.txt":"",
"library/cpp/streams/zc_memory_input/CMakeLists.darwin-x86_64.txt":"",
"library/cpp/streams/zc_memory_input/CMakeLists.linux-aarch64.txt":"",
"library/cpp/streams/zc_memory_input/CMakeLists.linux-x86_64.txt":"",
@@ -8560,6 +8565,16 @@
"ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux-x86_64.txt":"",
"ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.txt":"",
"ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.windows-x86_64.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/CMakeLists.darwin-x86_64.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-aarch64.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-x86_64.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/CMakeLists.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/CMakeLists.windows-x86_64.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.darwin-x86_64.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-aarch64.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-x86_64.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.txt":"",
+ "ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.windows-x86_64.txt":"",
"ydb/library/yql/udfs/common/datetime/CMakeLists.darwin-x86_64.txt":"",
"ydb/library/yql/udfs/common/datetime/CMakeLists.linux-aarch64.txt":"",
"ydb/library/yql/udfs/common/datetime/CMakeLists.linux-x86_64.txt":"",
diff --git a/library/cpp/streams/CMakeLists.txt b/library/cpp/streams/CMakeLists.txt
index 97c4ed1172..ae7ffe11fc 100644
--- a/library/cpp/streams/CMakeLists.txt
+++ b/library/cpp/streams/CMakeLists.txt
@@ -9,5 +9,6 @@
add_subdirectory(brotli)
add_subdirectory(bzip2)
add_subdirectory(lzma)
+add_subdirectory(xz)
add_subdirectory(zc_memory_input)
add_subdirectory(zstd)
diff --git a/library/cpp/streams/xz/CMakeLists.darwin-x86_64.txt b/library/cpp/streams/xz/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..30f9d34444
--- /dev/null
+++ b/library/cpp/streams/xz/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-streams-xz)
+target_link_libraries(cpp-streams-xz PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-lzma
+)
+target_sources(cpp-streams-xz PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/streams/xz/decompress.cpp
+)
diff --git a/library/cpp/streams/xz/CMakeLists.linux-aarch64.txt b/library/cpp/streams/xz/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..b7f0ce1cb8
--- /dev/null
+++ b/library/cpp/streams/xz/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-streams-xz)
+target_link_libraries(cpp-streams-xz PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-lzma
+)
+target_sources(cpp-streams-xz PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/streams/xz/decompress.cpp
+)
diff --git a/library/cpp/streams/xz/CMakeLists.linux-x86_64.txt b/library/cpp/streams/xz/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..b7f0ce1cb8
--- /dev/null
+++ b/library/cpp/streams/xz/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-streams-xz)
+target_link_libraries(cpp-streams-xz PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-lzma
+)
+target_sources(cpp-streams-xz PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/streams/xz/decompress.cpp
+)
diff --git a/library/cpp/streams/xz/CMakeLists.txt b/library/cpp/streams/xz/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/streams/xz/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/streams/xz/CMakeLists.windows-x86_64.txt b/library/cpp/streams/xz/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..30f9d34444
--- /dev/null
+++ b/library/cpp/streams/xz/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-streams-xz)
+target_link_libraries(cpp-streams-xz PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-lzma
+)
+target_sources(cpp-streams-xz PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/streams/xz/decompress.cpp
+)
diff --git a/library/cpp/streams/xz/README.md b/library/cpp/streams/xz/README.md
new file mode 100644
index 0000000000..edb344c0fb
--- /dev/null
+++ b/library/cpp/streams/xz/README.md
@@ -0,0 +1,9 @@
+XZ
+===
+`TXzDecompress` supports file formats:
+1) `.xz` - could be generated with CLI-tool `xz`. This format allows concatenating compressed files as is:
+```
+ echo foo | xz > foobar.xz
+ echo bar | xz >> foobar.xz
+```
+2) `.lzma` - could be generated with CLI-tool `lzma` - it is legacy: https://fossies.org/linux/xz/README
diff --git a/library/cpp/streams/xz/decompress.cpp b/library/cpp/streams/xz/decompress.cpp
new file mode 100644
index 0000000000..361b3cff14
--- /dev/null
+++ b/library/cpp/streams/xz/decompress.cpp
@@ -0,0 +1,326 @@
+#include "decompress.h"
+
+#include <contrib/libs/lzma/liblzma/api/lzma.h>
+
+#include <util/generic/yexception.h>
+#include <util/stream/output.h>
+#include <util/stream/str.h>
+#include <util/stream/zerocopy.h>
+
+// Based on https://fossies.org/linux/xz/doc/examples/02_decompress.c
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file 02_decompress.c
+/// \brief Decompress .xz files to stdout
+///
+/// Usage: ./02_decompress INPUT_FILES... > OUTFILE
+///
+/// Example: ./02_decompress foo.xz bar.xz > foobar
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+namespace {
+ class IInput {
+ public:
+ virtual ~IInput() = default;
+ virtual size_t Next(const ui8*& ptr) = 0;
+ };
+
+ class TCopyInput: public IInput {
+ public:
+ TCopyInput(IInputStream* slave)
+ : Slave_(slave)
+ {
+ }
+
+ size_t Next(const ui8*& ptr) override {
+ ptr = Inbuf_;
+ return Slave_->Read(Inbuf_, sizeof(Inbuf_));
+ }
+
+ private:
+ IInputStream* Slave_;
+ ui8 Inbuf_[4096];
+ };
+
+ class TZeroCopy: public IInput {
+ public:
+ TZeroCopy(IZeroCopyInput* slave)
+ : Slave_(slave)
+ {
+ }
+
+ size_t Next(const ui8*& ptr) override {
+ return Slave_->Next(&ptr);
+ }
+
+ private:
+ IZeroCopyInput* Slave_;
+ };
+
+ std::unique_ptr<IInput> createInput(IInputStream* slave) {
+ return std::make_unique<TCopyInput>(slave);
+ }
+
+ std::unique_ptr<IInput> createInput(IZeroCopyInput* slave) {
+ return std::make_unique<TZeroCopy>(slave);
+ }
+}
+
+class TUnbufferedXzDecompress::TImpl {
+public:
+ template <class T>
+ TImpl(T* slave)
+ : Input_(createInput(slave))
+ , Strm_(LZMA_STREAM_INIT)
+ {
+ TString err;
+ Y_ENSURE(initDecoder(&Strm_, err),
+ "Error initializing the decoder: " << err);
+ Strm_.next_in = NULL;
+ Strm_.avail_in = 0;
+ }
+
+ ~TImpl() {
+ // Free the memory allocated for the decoder
+ lzma_end(&Strm_);
+ }
+
+ size_t DoRead(void* buf, size_t len) {
+ if (IsOutFinished_) {
+ return 0;
+ }
+
+ size_t res;
+ TString err;
+
+ Y_ENSURE(decompress(buf, len, res, err),
+ "lzma decoder error: " << err);
+
+ return res;
+ }
+
+private:
+ bool decompress(void* buf, size_t len, size_t& outLen, TString& err) {
+ // When LZMA_CONCATENATED flag was used when initializing the decoder,
+ // we need to tell lzma_code() when there will be no more input.
+ // This is done by setting action to LZMA_FINISH instead of LZMA_RUN
+ // in the same way as it is done when encoding.
+ //
+ // When LZMA_CONCATENATED isn't used, there is no need to use
+ // LZMA_FINISH to tell when all the input has been read, but it
+ // is still OK to use it if you want. When LZMA_CONCATENATED isn't
+ // used, the decoder will stop after the first .xz stream. In that
+ // case some unused data may be left in strm->next_in.
+ lzma_action action = LZMA_RUN;
+
+ Strm_.next_out = (ui8*)buf;
+ Strm_.avail_out = len;
+
+ while (true) {
+ if (Strm_.avail_in == 0 && !IsInFinished_) {
+ size_t size = Input_->Next(Strm_.next_in);
+
+ if (size == 0) {
+ IsInFinished_ = true;
+ } else {
+ Strm_.avail_in = size;
+ }
+
+ // Once the end of the input file has been reached,
+ // we need to tell lzma_code() that no more input
+ // will be coming. As said before, this isn't required
+ // if the LZMA_CONCATENATED flag isn't used when
+ // initializing the decoder.
+ if (IsInFinished_)
+ action = LZMA_FINISH;
+ }
+
+ lzma_ret ret = lzma_code(&Strm_, action);
+
+ if (ret == LZMA_STREAM_END) {
+ // Once everything has been decoded successfully, the
+ // return value of lzma_code() will be LZMA_STREAM_END.
+ //
+ // It is important to check for LZMA_STREAM_END. Do not
+ // assume that getting ret != LZMA_OK would mean that
+ // everything has gone well or that when you aren't
+ // getting more output it must have successfully
+ // decoded everything.
+ IsOutFinished_ = true;
+ }
+
+ if (Strm_.avail_out == 0 || ret == LZMA_STREAM_END) {
+ outLen = len - Strm_.avail_out;
+ return true;
+ }
+
+ if (ret != LZMA_OK) {
+ // It's not LZMA_OK nor LZMA_STREAM_END,
+ // so it must be an error code. See lzma/base.h
+ // (src/liblzma/api/lzma/base.h in the source package
+ // or e.g. /usr/include/lzma/base.h depending on the
+ // install prefix) for the list and documentation of
+ // possible values. Many values listen in lzma_ret
+ // enumeration aren't possible in this example, but
+ // can be made possible by enabling memory usage limit
+ // or adding flags to the decoder initialization.
+ switch (ret) {
+ case LZMA_MEM_ERROR:
+ err = "Memory allocation failed";
+ break;
+
+ case LZMA_FORMAT_ERROR:
+ // .xz magic bytes weren't found.
+ err = "The input is not in the .xz format";
+ break;
+
+ case LZMA_OPTIONS_ERROR:
+ // For example, the headers specify a filter
+ // that isn't supported by this liblzma
+ // version (or it hasn't been enabled when
+ // building liblzma, but no-one sane does
+ // that unless building liblzma for an
+ // embedded system). Upgrading to a newer
+ // liblzma might help.
+ //
+ // Note that it is unlikely that the file has
+ // accidentally became corrupt if you get this
+ // error. The integrity of the .xz headers is
+ // always verified with a CRC32, so
+ // unintentionally corrupt files can be
+ // distinguished from unsupported files.
+ err = "Unsupported compression options";
+ break;
+
+ case LZMA_DATA_ERROR:
+ err = "Compressed file is corrupt";
+ break;
+
+ case LZMA_BUF_ERROR:
+ // Typically this error means that a valid
+ // file has got truncated, but it might also
+ // be a damaged part in the file that makes
+ // the decoder think the file is truncated.
+ // If you prefer, you can use the same error
+ // message for this as for LZMA_DATA_ERROR.
+ err = "Compressed file is truncated or "
+ "otherwise corrupt";
+ break;
+
+ default:
+ // This is most likely LZMA_PROG_ERROR.
+ err = "Unknown error, possibly a bug";
+ break;
+ }
+
+ TStringOutput out(err);
+ out << "[" << (int)ret << "]";
+ return false;
+ }
+ }
+ }
+
+ static bool initDecoder(lzma_stream* strm, TString& err) {
+ // Initialize a .xz decoder. The decoder supports a memory usage limit
+ // and a set of flags.
+ //
+ // The memory usage of the decompressor depends on the settings used
+ // to compress a .xz file. It can vary from less than a megabyte to
+ // a few gigabytes, but in practice (at least for now) it rarely
+ // exceeds 65 MiB because that's how much memory is required to
+ // decompress files created with "xz -9". Settings requiring more
+ // memory take extra effort to use and don't (at least for now)
+ // provide significantly better compression in most cases.
+ //
+ // Memory usage limit is useful if it is important that the
+ // decompressor won't consume gigabytes of memory. The need
+ // for limiting depends on the application. In this example,
+ // no memory usage limiting is used. This is done by setting
+ // the limit to UINT64_MAX.
+ //
+ // The .xz format allows concatenating compressed files as is:
+ //
+ // echo foo | xz > foobar.xz
+ // echo bar | xz >> foobar.xz
+ //
+ // When decompressing normal standalone .xz files, LZMA_CONCATENATED
+ // should always be used to support decompression of concatenated
+ // .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop
+ // after the first .xz stream. This can be useful when .xz data has
+ // been embedded inside another file format.
+ //
+ // Flags other than LZMA_CONCATENATED are supported too, and can
+ // be combined with bitwise-or. See lzma/container.h
+ // (src/liblzma/api/lzma/container.h in the source package or e.g.
+ // /usr/include/lzma/container.h depending on the install prefix)
+ // for details.
+ lzma_ret ret = lzma_auto_decoder(
+ strm, UINT64_MAX, LZMA_CONCATENATED);
+
+ // Return successfully if the initialization went fine.
+ if (ret == LZMA_OK)
+ return true;
+
+ // Something went wrong. The possible errors are documented in
+ // lzma/container.h (src/liblzma/api/lzma/container.h in the source
+ // package or e.g. /usr/include/lzma/container.h depending on the
+ // install prefix).
+ //
+ // Note that LZMA_MEMLIMIT_ERROR is never possible here. If you
+ // specify a very tiny limit, the error will be delayed until
+ // the first headers have been parsed by a call to lzma_code().
+ switch (ret) {
+ case LZMA_MEM_ERROR:
+ err = "Memory allocation failed";
+ break;
+
+ case LZMA_OPTIONS_ERROR:
+ err = "Unsupported decompressor flags";
+ break;
+
+ default:
+ // This is most likely LZMA_PROG_ERROR indicating a bug in
+ // this program or in liblzma. It is inconvenient to have a
+ // separate error message for errors that should be impossible
+ // to occur, but knowing the error code is important for
+ // debugging. That's why it is good to print the error code
+ // at least when there is no good error message to show.
+ err = "Unknown error, possibly a bug";
+ break;
+ }
+
+ TStringOutput out(err);
+ out << "[" << (int)ret << "]";
+ return false;
+ }
+
+private:
+ std::unique_ptr<IInput> Input_;
+ lzma_stream Strm_;
+
+ bool IsInFinished_ = false;
+ bool IsOutFinished_ = false;
+};
+
+TUnbufferedXzDecompress::TUnbufferedXzDecompress(IInputStream* slave)
+ : Impl_(std::make_unique<TImpl>(slave))
+{
+}
+
+TUnbufferedXzDecompress::TUnbufferedXzDecompress(IZeroCopyInput* slave)
+ : Impl_(std::make_unique<TImpl>(slave))
+{
+}
+
+TUnbufferedXzDecompress::~TUnbufferedXzDecompress() = default;
+
+size_t TUnbufferedXzDecompress::DoRead(void* buf, size_t len) {
+ return Impl_->DoRead(buf, len);
+}
diff --git a/library/cpp/streams/xz/decompress.h b/library/cpp/streams/xz/decompress.h
new file mode 100644
index 0000000000..8389cbdaf9
--- /dev/null
+++ b/library/cpp/streams/xz/decompress.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <util/stream/buffered.h>
+#include <util/stream/input.h>
+
+class IZeroCopyInput;
+
+/**
+ * Unbuffered decompressing stream for .XZ and .LZMA files.
+ *
+ * Do not use it for reading in small pieces.
+ */
+class TUnbufferedXzDecompress: public IInputStream {
+public:
+ TUnbufferedXzDecompress(IInputStream* slave);
+ TUnbufferedXzDecompress(IZeroCopyInput* slave);
+ ~TUnbufferedXzDecompress() override;
+
+private:
+ size_t DoRead(void* buf, size_t len) override;
+
+private:
+ class TImpl;
+ std::unique_ptr<TImpl> Impl_;
+};
+
+/**
+ * Buffered decompressing stream for .XZ and .LZMA files.
+ *
+ * Supports efficient `ReadLine` calls and similar "reading in small pieces"
+ * usage patterns.
+ */
+class TXzDecompress: public TBuffered<TUnbufferedXzDecompress> {
+public:
+ template <class T>
+ inline TXzDecompress(T&& t, size_t buf = 1 << 13)
+ : TBuffered<TUnbufferedXzDecompress>(buf, std::forward<T>(t))
+ {
+ }
+};
diff --git a/library/cpp/streams/xz/ut/decompress_ut.cpp b/library/cpp/streams/xz/ut/decompress_ut.cpp
new file mode 100644
index 0000000000..2ebeca4e85
--- /dev/null
+++ b/library/cpp/streams/xz/ut/decompress_ut.cpp
@@ -0,0 +1,26 @@
+#include <library/cpp/streams/xz/decompress.h>
+
+#include <library/cpp/string_utils/base64/base64.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(XzDecompress) {
+ Y_UNIT_TEST(decompress) {
+ TStringStream in;
+ in << Base64Decode("/Td6WFoAAATm1rRGAgAhARYAAAB0L+Wj4ANQAUVdABkMHARbg7qMApbl/qwEvrgQKpvF7Rbp/QJJdquZ88M3I5x3ANhSSpxvtnSoyPDeC6M8vz0vNKiOCsbIqvsGIwxrx+6YNqT87gDxVS8S3fHeoAZTf+zbg1DpDtv7Xh7Q3ug24wxNbPMi2p+WAo3V0LAi+lGUQmA44nJlabRv0XZ5CWhwgYtEWrrbPxoFjONeCa4p5BoX+TVgWegToFQMeJhVXMbDGWOIFL56X/F7nDJ47pjAy2GJIHHI5W/wrGH6uB0TCwpudW96peQaEgwMSZE07PfPE+XkfEymxhkxTs5Mnpc2rmQCiZ+3I6PqP+Qj8fuqaxb0fAJPQrbWYsqqeXP/3VNOeDRk+Szr9H3TMGI6yepUgkrgqNpaIYYcbxTU43eofcnTdwdsgi8fpH99tx3rrKq4zveStkZgZQqeY+MCvineIAAAAAAA2X8RUfmPU3kAAeEC0QYAANTt6P6xxGf7AgAAAAAEWVo=");
+
+ TXzDecompress xz(&in);
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ xz.ReadAll(),
+ "2020-08-27T18:22:02.332 INFO: Starting blackbox module\n"
+ "2020-08-27T18:22:02.850 INFO: Init libauth (root kspace=<yandex_ru>, sign with key #49)\n"
+ "2020-08-27T18:22:02.851 DEBUG: KeyRing: randoms: table name loaded. Took 0.000918s\n"
+ "2020-08-27T18:22:02.853 DEBUG: KeyRing: randoms: min-max key id loaded. Took 0.001249s\n"
+ "2020-08-27T18:22:02.863 DEBUG: KeyRing: randoms: new keys loaded. Took 0.010837s\n"
+ "2020-08-27T18:22:02.865 DEBUG: Loaded 2389 new key(s) for keyspace 'yandex_ru'. Key ids: 330589-335364\n"
+ "2020-08-27T18:22:02.866 INFO: Attempt to load second time for spacename yandex_ru\n"
+ "2020-08-27T18:22:02.867 DEBUG: KeyRing: randoms_ua: table name loaded. Took 0.000926s\n"
+ "2020-08-27T18:22:02.868 DEBUG: KeyRing: randoms_ua: min-max key id loaded. Took 0.001212s\n"
+ "2020-08-27T18:22:02.871 DEBUG: KeyRing: randoms_ua: new keys loaded. Took 0.003202s\n");
+ }
+}
diff --git a/library/cpp/streams/xz/ut/ya.make b/library/cpp/streams/xz/ut/ya.make
new file mode 100644
index 0000000000..4879e7ccb2
--- /dev/null
+++ b/library/cpp/streams/xz/ut/ya.make
@@ -0,0 +1,11 @@
+UNITTEST_FOR(library/cpp/streams/xz)
+
+PEERDIR(
+ library/cpp/string_utils/base64
+)
+
+SRCS(
+ decompress_ut.cpp
+)
+
+END()
diff --git a/library/cpp/streams/xz/ya.make b/library/cpp/streams/xz/ya.make
new file mode 100644
index 0000000000..1e1c0b76fd
--- /dev/null
+++ b/library/cpp/streams/xz/ya.make
@@ -0,0 +1,15 @@
+LIBRARY()
+
+PEERDIR(
+ contrib/libs/lzma
+)
+
+SRCS(
+ decompress.cpp
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/library/yql/udfs/common/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/udfs/common/CMakeLists.darwin-x86_64.txt
index 4ddd9b3e37..01d74fdba1 100644
--- a/ydb/library/yql/udfs/common/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/udfs/common/CMakeLists.darwin-x86_64.txt
@@ -7,6 +7,7 @@
add_subdirectory(clickhouse)
+add_subdirectory(compress_base)
add_subdirectory(datetime)
add_subdirectory(datetime2)
add_subdirectory(digest)
diff --git a/ydb/library/yql/udfs/common/CMakeLists.linux-aarch64.txt b/ydb/library/yql/udfs/common/CMakeLists.linux-aarch64.txt
index 9cf6fa9c4f..d72ac518fa 100644
--- a/ydb/library/yql/udfs/common/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/udfs/common/CMakeLists.linux-aarch64.txt
@@ -7,6 +7,7 @@
add_subdirectory(clickhouse)
+add_subdirectory(compress_base)
add_subdirectory(datetime)
add_subdirectory(datetime2)
add_subdirectory(digest)
diff --git a/ydb/library/yql/udfs/common/CMakeLists.linux-x86_64.txt b/ydb/library/yql/udfs/common/CMakeLists.linux-x86_64.txt
index 4ddd9b3e37..01d74fdba1 100644
--- a/ydb/library/yql/udfs/common/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/udfs/common/CMakeLists.linux-x86_64.txt
@@ -7,6 +7,7 @@
add_subdirectory(clickhouse)
+add_subdirectory(compress_base)
add_subdirectory(datetime)
add_subdirectory(datetime2)
add_subdirectory(digest)
diff --git a/ydb/library/yql/udfs/common/CMakeLists.windows-x86_64.txt b/ydb/library/yql/udfs/common/CMakeLists.windows-x86_64.txt
index 4ddd9b3e37..01d74fdba1 100644
--- a/ydb/library/yql/udfs/common/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/udfs/common/CMakeLists.windows-x86_64.txt
@@ -7,6 +7,7 @@
add_subdirectory(clickhouse)
+add_subdirectory(compress_base)
add_subdirectory(datetime)
add_subdirectory(datetime2)
add_subdirectory(digest)
diff --git a/ydb/library/yql/udfs/common/compress_base/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/udfs/common/compress_base/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..0bca745076
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,67 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
+
+add_shared_library(compress_udf.dyn)
+set_property(TARGET compress_udf.dyn PROPERTY
+ OUTPUT_NAME compress_udf
+)
+target_compile_options(compress_udf.dyn PRIVATE
+ -DBUILD_UDF
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(compress_udf.dyn PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+target_link_options(compress_udf.dyn PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -undefined
+ dynamic_lookup
+ -fPIC
+)
+target_sources(compress_udf.dyn PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
+)
+use_export_script(compress_udf.dyn
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/udfs_exports.exports
+)
+vcs_info(compress_udf.dyn)
+
+add_library(compress_udf INTERFACE)
+target_link_libraries(compress_udf INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+
+add_global_library_for(compress_udf.global compress_udf)
+target_compile_options(compress_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(compress_udf.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+target_sources(compress_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
+)
diff --git a/ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-aarch64.txt b/ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..5582b9ac18
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,75 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
+
+add_shared_library(compress_udf.dyn)
+set_property(TARGET compress_udf.dyn PROPERTY
+ OUTPUT_NAME compress_udf
+)
+target_compile_options(compress_udf.dyn PRIVATE
+ -DBUILD_UDF
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(compress_udf.dyn PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+target_link_options(compress_udf.dyn PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -Wl,-z,notext
+ -Wl,-Bsymbolic
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(compress_udf.dyn PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
+)
+use_export_script(compress_udf.dyn
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/udfs_exports.exports
+)
+vcs_info(compress_udf.dyn)
+
+add_library(compress_udf INTERFACE)
+target_link_libraries(compress_udf INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+
+add_global_library_for(compress_udf.global compress_udf)
+target_compile_options(compress_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(compress_udf.global PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+target_sources(compress_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
+)
diff --git a/ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-x86_64.txt b/ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..5582b9ac18
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,75 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
+
+add_shared_library(compress_udf.dyn)
+set_property(TARGET compress_udf.dyn PROPERTY
+ OUTPUT_NAME compress_udf
+)
+target_compile_options(compress_udf.dyn PRIVATE
+ -DBUILD_UDF
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(compress_udf.dyn PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+target_link_options(compress_udf.dyn PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -Wl,-z,notext
+ -Wl,-Bsymbolic
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(compress_udf.dyn PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
+)
+use_export_script(compress_udf.dyn
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/udfs_exports.exports
+)
+vcs_info(compress_udf.dyn)
+
+add_library(compress_udf INTERFACE)
+target_link_libraries(compress_udf INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+
+add_global_library_for(compress_udf.global compress_udf)
+target_compile_options(compress_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(compress_udf.global PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+target_sources(compress_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
+)
diff --git a/ydb/library/yql/udfs/common/compress_base/CMakeLists.txt b/ydb/library/yql/udfs/common/compress_base/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/udfs/common/compress_base/CMakeLists.windows-x86_64.txt b/ydb/library/yql/udfs/common/compress_base/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..6aa0484035
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,60 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
+
+add_shared_library(compress_udf.dyn)
+set_property(TARGET compress_udf.dyn PROPERTY
+ OUTPUT_NAME compress_udf
+)
+target_compile_options(compress_udf.dyn PRIVATE
+ -DBUILD_UDF
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(compress_udf.dyn PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+target_sources(compress_udf.dyn PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
+)
+use_export_script(compress_udf.dyn
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/udfs_exports.exports
+)
+vcs_info(compress_udf.dyn)
+
+add_library(compress_udf INTERFACE)
+target_link_libraries(compress_udf INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+
+add_global_library_for(compress_udf.global compress_udf)
+target_compile_options(compress_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(compress_udf.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ common-compress_base-lib
+)
+target_sources(compress_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
+)
diff --git a/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp b/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
new file mode 100644
index 0000000000..efd2d0b3c5
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/compress_udf.cpp
@@ -0,0 +1,17 @@
+#include "lib/compress_base_udf.h"
+
+using namespace NYql::NUdf;
+
+namespace NCompress {
+ SIMPLE_MODULE(TCompressModule, EXPORTED_COMPRESS_BASE_UDF);
+}
+
+namespace NDecompress {
+ SIMPLE_MODULE(TDecompressModule, EXPORTED_DECOMPRESS_BASE_UDF);
+}
+
+namespace NTryDecompress {
+ SIMPLE_MODULE(TTryDecompressModule, EXPORTED_TRY_DECOMPRESS_BASE_UDF);
+}
+
+REGISTER_MODULES(NCompress::TCompressModule, NDecompress::TDecompressModule, NTryDecompress::TTryDecompressModule);
diff --git a/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..81d01df40f
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,29 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(common-compress_base-lib)
+target_compile_options(common-compress_base-lib PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(common-compress_base-lib PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ contrib-libs-snappy
+ cpp-streams-brotli
+ cpp-streams-bzip2
+ cpp-streams-lzma
+ cpp-streams-xz
+ cpp-streams-zstd
+)
+target_sources(common-compress_base-lib PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.cpp
+)
diff --git a/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-aarch64.txt b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..5ff203cc5f
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,30 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(common-compress_base-lib)
+target_compile_options(common-compress_base-lib PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(common-compress_base-lib PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ contrib-libs-snappy
+ cpp-streams-brotli
+ cpp-streams-bzip2
+ cpp-streams-lzma
+ cpp-streams-xz
+ cpp-streams-zstd
+)
+target_sources(common-compress_base-lib PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.cpp
+)
diff --git a/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-x86_64.txt b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..5ff203cc5f
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,30 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(common-compress_base-lib)
+target_compile_options(common-compress_base-lib PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(common-compress_base-lib PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ contrib-libs-snappy
+ cpp-streams-brotli
+ cpp-streams-bzip2
+ cpp-streams-lzma
+ cpp-streams-xz
+ cpp-streams-zstd
+)
+target_sources(common-compress_base-lib PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.cpp
+)
diff --git a/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.txt b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.windows-x86_64.txt b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..81d01df40f
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/lib/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,29 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(common-compress_base-lib)
+target_compile_options(common-compress_base-lib PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=23
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(common-compress_base-lib PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ contrib-libs-snappy
+ cpp-streams-brotli
+ cpp-streams-bzip2
+ cpp-streams-lzma
+ cpp-streams-xz
+ cpp-streams-zstd
+)
+target_sources(common-compress_base-lib PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.cpp
+)
diff --git a/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.cpp b/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.cpp
new file mode 100644
index 0000000000..237abe271e
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.cpp
@@ -0,0 +1 @@
+#include "compress_base_udf.h" \ No newline at end of file
diff --git a/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.h b/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.h
new file mode 100644
index 0000000000..621c84f581
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/lib/compress_base_udf.h
@@ -0,0 +1,218 @@
+#pragma once
+
+#include <ydb/library/yql/public/udf/udf_helpers.h>
+
+#include <library/cpp/streams/brotli/brotli.h>
+#include <library/cpp/streams/bzip2/bzip2.h>
+#include <library/cpp/streams/zstd/zstd.h>
+#include <library/cpp/streams/lzma/lzma.h>
+#include <library/cpp/streams/xz/decompress.h>
+
+#include <util/stream/mem.h>
+#include <util/stream/zlib.h>
+
+#include <contrib/libs/snappy/snappy.h>
+
+using namespace NYql::NUdf;
+
+namespace NCompress {
+ SIMPLE_UDF(TGzip, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TZLibCompress compress(&output, ZLib::GZip, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TZlib, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TZLibCompress compress(&output, ZLib::ZLib, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TBrotliCompress compress(&output, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TLzma, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TLzmaCompress compress(&output, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TBZipCompress compress(&output, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) {
+ TString result;
+ const TStringRef& input = args[0].AsStringRef();
+ snappy::Compress(input.Data(), input.Size(), &result);
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TZstd, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TZstdCompress compress(&output, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+}
+
+namespace NDecompress {
+ SIMPLE_UDF(TGzip, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZLibDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TZlib, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZLibDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TBrotliDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TLzma, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TLzmaDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TBZipDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) {
+ TString result;
+ const auto& value = args->AsStringRef();
+ if (snappy::Uncompress(value.Data(), value.Size(), &result)) {
+ return valueBuilder->NewString(result);
+ }
+
+ ythrow yexception() << "failed to decompress message with snappy";
+ }
+
+ SIMPLE_UDF(TZstd, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZstdDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TXz, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TXzDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+}
+
+namespace NTryDecompress {
+ SIMPLE_UDF(TGzip, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZLibDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TZlib, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZLibDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TBrotli, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TBrotliDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TLzma, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TLzmaDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TBZip2, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TBZipDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TSnappy, TOptional<char*>(TAutoMap<char*>)) {
+ TString result;
+ const auto& value = args->AsStringRef();
+ if (snappy::Uncompress(value.Data(), value.Size(), &result)) {
+ return valueBuilder->NewString(result);
+ }
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TZstd, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZstdDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TXz, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TXzDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+}
+
+#define EXPORTED_COMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd
+#define EXPORTED_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz
+#define EXPORTED_TRY_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz
diff --git a/ydb/library/yql/udfs/common/compress_base/lib/ya.make b/ydb/library/yql/udfs/common/compress_base/lib/ya.make
new file mode 100644
index 0000000000..2d3fa510ae
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/lib/ya.make
@@ -0,0 +1,23 @@
+LIBRARY()
+
+YQL_ABI_VERSION(
+ 2
+ 23
+ 0
+)
+
+SRCS(
+ compress_base_udf.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/public/udf
+ contrib/libs/snappy
+ library/cpp/streams/brotli
+ library/cpp/streams/bzip2
+ library/cpp/streams/lzma
+ library/cpp/streams/xz
+ library/cpp/streams/zstd
+)
+
+END()
diff --git a/ydb/library/yql/udfs/common/compress_base/test/canondata/result.json b/ydb/library/yql/udfs/common/compress_base/test/canondata/result.json
new file mode 100644
index 0000000000..5323168beb
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/test/canondata/result.json
@@ -0,0 +1,12 @@
+{
+ "test.test[RoundTrip]": [
+ {
+ "uri": "file://test.test_RoundTrip_/results.txt"
+ }
+ ],
+ "test.test[TryDecompress]": [
+ {
+ "uri": "file://test.test_TryDecompress_/results.txt"
+ }
+ ]
+}
diff --git a/ydb/library/yql/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt b/ydb/library/yql/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt
new file mode 100644
index 0000000000..2c0cefa419
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt
@@ -0,0 +1,124 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "gzip";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "zlib";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "brotli";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "lzma";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bzip2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "zstd";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "snappy";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "H4sIAAAAAAAAAwMAAAAAAAAAAAA="
+ ];
+ "x^\3\0\0\0\0\1";
+ "k\0\3";
+ [
+ "XQAAAAEAg//7///AAAAA"
+ ];
+ [
+ "QlpoNRdyRThQkAAAAAA="
+ ];
+ [
+ "KLUv/SAAAQAA"
+ ];
+ "\0"
+ ];
+ [
+ [
+ "H4sIAAAAAAAAAzMEALfv3IMBAAAA"
+ ];
+ "x^3\4\0\0002\0002";
+ [
+ "CwCAMQM="
+ ];
+ [
+ "XQAAAAEAGMH7////4AAAAA=="
+ ];
+ [
+ "QlpoNTFBWSZTWWEEMGwAAAAIACAAIAAhGEaC7kinChIMIIYNgA=="
+ ];
+ [
+ "KLUv/QBYCQAAMQ=="
+ ];
+ "\1\0001"
+ ];
+ [
+ [
+ "H4sIAAAAAAAAAzM0MjYxNTO3sDQAAOWuHSYKAAAA"
+ ];
+ [
+ "eF4zNDI2MTUzt7A0AAALLAIO"
+ ];
+ [
+ "iwSAMTIzNDU2Nzg5MAM="
+ ];
+ [
+ "XQAAAAEAGIyCtsQRNFxO4dpOCbf//KPgAA=="
+ ];
+ [
+ "QlpoNTFBWSZTWVBoU7YAAACIAH/gIAAiAaaYQAwVXmjj6Yu5IpwoSCg0KdsA"
+ ];
+ [
+ "KLUv/QBYUQAAMTIzNDU2Nzg5MA=="
+ ];
+ "\n$1234567890"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/ydb/library/yql/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt b/ydb/library/yql/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt
new file mode 100644
index 0000000000..649a6670a9
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt
@@ -0,0 +1,188 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "ok_Gzip";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Gzip";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Zlib";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Zlib";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Brotli";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Brotli";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Lzma";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Lzma";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_BZip2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_BZip2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Snappy";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Snappy";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Zstd";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Zstd";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/ydb/library/yql/udfs/common/compress_base/test/cases/RoundTrip.sql b/ydb/library/yql/udfs/common/compress_base/test/cases/RoundTrip.sql
new file mode 100644
index 0000000000..4c8eba4aab
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/test/cases/RoundTrip.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+$level = 5;
+
+SELECT
+ Ensure(Compress::Gzip(value, $level), Decompress::Gzip(Compress::Gzip(value, $level)) == value, "gzip failed at: " || value) AS gzip,
+ Ensure(Compress::Zlib(value, $level), Decompress::Zlib(Compress::Zlib(value, $level)) == value, "zlib failed at: " || value) AS zlib,
+ Ensure(Compress::Brotli(value, $level), Decompress::Brotli(Compress::Brotli(value, $level)) == value, "brotli failed at: " || value) AS brotli,
+ Ensure(Compress::Lzma(value, $level), Decompress::Lzma(Compress::Lzma(value, $level)) == value, "lzma failed at: " || value) AS lzma,
+ Ensure(Compress::BZip2(value, $level), Decompress::BZip2(Compress::BZip2(value, $level)) == value, "bzip2 failed at: " || value) AS bzip2,
+ Ensure(Compress::Zstd(value, $level), Decompress::Zstd(Compress::Zstd(value, $level)) == value, "zstd failed at: " || value) AS zstd,
+ Ensure(Compress::Snappy(value), Decompress::Snappy(Compress::Snappy(value)) == value, "Snappy failed at: " || value) AS snappy,
+FROM Input;
diff --git a/ydb/library/yql/udfs/common/compress_base/test/cases/TryDecompress.sql b/ydb/library/yql/udfs/common/compress_base/test/cases/TryDecompress.sql
new file mode 100644
index 0000000000..a3e612ab6d
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/test/cases/TryDecompress.sql
@@ -0,0 +1,19 @@
+/* syntax version 1 */
+$bad = "Is not compressed!";
+
+SELECT
+ TryDecompress::Gzip(Compress::Gzip($bad, 3)) = $bad AS ok_Gzip,
+ TryDecompress::Gzip($bad) AS bad_Gzip,
+ TryDecompress::Zlib(Compress::Zlib($bad, 3)) = $bad AS ok_Zlib,
+ TryDecompress::Zlib($bad) AS bad_Zlib,
+ TryDecompress::Brotli(Compress::Brotli($bad, 3)) = $bad AS ok_Brotli,
+ TryDecompress::Brotli($bad) AS bad_Brotli,
+ TryDecompress::Lzma(Compress::Lzma($bad, 3)) = $bad AS ok_Lzma,
+ TryDecompress::Lzma($bad) AS bad_Lzma,
+ TryDecompress::BZip2(Compress::BZip2($bad, 3)) = $bad AS ok_BZip2,
+ TryDecompress::BZip2($bad) AS bad_BZip2,
+ TryDecompress::Snappy(Compress::Snappy($bad)) = $bad AS ok_Snappy,
+ TryDecompress::Snappy($bad) AS bad_Snappy,
+ TryDecompress::Zstd(Compress::Zstd($bad, 3)) = $bad AS ok_Zstd,
+ TryDecompress::Zstd($bad) AS bad_Zstd;
+
diff --git a/ydb/library/yql/udfs/common/compress_base/test/cases/default.in b/ydb/library/yql/udfs/common/compress_base/test/cases/default.in
new file mode 100644
index 0000000000..8fee3ddb78
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/test/cases/default.in
@@ -0,0 +1,3 @@
+{"key"="1";"subkey"="2";"value"=""};
+{"key"="2";"subkey"="2";"value"="1"};
+{"key"="3";"subkey"="3";"value"="1234567890"};
diff --git a/ydb/library/yql/udfs/common/compress_base/test/ya.make b/ydb/library/yql/udfs/common/compress_base/test/ya.make
new file mode 100644
index 0000000000..7178119919
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/test/ya.make
@@ -0,0 +1,11 @@
+YQL_UDF_YDB_TEST()
+
+DEPENDS(ydb/library/yql/udfs/common/compress_base)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+SIZE(MEDIUM)
+
+END()
diff --git a/ydb/library/yql/udfs/common/compress_base/ya.make b/ydb/library/yql/udfs/common/compress_base/ya.make
new file mode 100644
index 0000000000..b43528d3fc
--- /dev/null
+++ b/ydb/library/yql/udfs/common/compress_base/ya.make
@@ -0,0 +1,22 @@
+YQL_UDF_YDB(compress_udf)
+
+YQL_ABI_VERSION(
+ 2
+ 23
+ 0
+)
+
+SRCS(
+ compress_udf.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/public/udf
+ ydb/library/yql/udfs/common/compress_base/lib
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/ydb/library/yql/udfs/common/ya.make b/ydb/library/yql/udfs/common/ya.make
index 2b37fd12d8..b1c226bc20 100644
--- a/ydb/library/yql/udfs/common/ya.make
+++ b/ydb/library/yql/udfs/common/ya.make
@@ -1,5 +1,6 @@
RECURSE(
clickhouse/client
+ compress_base
datetime
datetime2
digest