aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorakozhikhov <akozhikhov@yandex-team.com>2024-11-25 11:57:57 +0300
committerakozhikhov <akozhikhov@yandex-team.com>2024-11-25 12:11:48 +0300
commitd790e89c4f99ddcc80ac630927e3c153ef6bad90 (patch)
tree7abc2a8f19bb9bc3fb8e916c9f7f0fc8faad368d
parent0d292422268782f0712088fc7aefdc71f4194789 (diff)
downloadydb-d790e89c4f99ddcc80ac630927e3c153ef6bad90.tar.gz
YT-23398: More robust checks in dictionary compression
commit_hash:7cf2c37d325706e6180cee7e15e7193f760f3716
-rw-r--r--yt/yt/client/table_client/config.h4
-rw-r--r--yt/yt/core/compression/dictionary_codec.h4
-rw-r--r--yt/yt/core/compression/zstd.cpp17
3 files changed, 19 insertions, 6 deletions
diff --git a/yt/yt/client/table_client/config.h b/yt/yt/client/table_client/config.h
index 2f91bcd032..bdb92d90be 100644
--- a/yt/yt/client/table_client/config.h
+++ b/yt/yt/client/table_client/config.h
@@ -249,10 +249,6 @@ public:
//! Recommended to be ~100 times less than weight of samples for that column.
i64 ColumnDictionarySize;
- //! Level of compression algorithm.
- //! Applied to digested compression dictionary upon its construction.
- int CompressionLevel;
-
//! Subset of all dictionary building policies.
//! Will build and apply dictionaries only from this subset.
//! Upon each chunk compression will independently decide which dictionary fits best.
diff --git a/yt/yt/core/compression/dictionary_codec.h b/yt/yt/core/compression/dictionary_codec.h
index a394d452e6..fc36b4812d 100644
--- a/yt/yt/core/compression/dictionary_codec.h
+++ b/yt/yt/core/compression/dictionary_codec.h
@@ -95,8 +95,9 @@ struct IDictionaryCompressionCodec
virtual IDictionaryDecompressorPtr CreateDictionaryDecompressor(
const IDigestedDecompressionDictionaryPtr& digestedDecompressionDictionary) const = 0;
- // NB: Raw #compressionDictionary data will be copied and stored within digested dictionary in a preprocessed form.
+ //! NB: Raw #compressionDictionary data will be copied and stored within digested dictionary in a preprocessed form.
//! #compressionLevel determines compression level that will be applied for each compression with that dictionary later on.
+ //! These methods may throw.
virtual IDigestedCompressionDictionaryPtr CreateDigestedCompressionDictionary(
const TSharedRef& compressionDictionary,
int compressionLevel) const = 0;
@@ -104,6 +105,7 @@ struct IDictionaryCompressionCodec
const TSharedRef& compressionDictionary) const = 0;
//! Parses header of compressed frame #input and returns specified frame info.
+ //! This method may throw.
virtual TDictionaryCompressionFrameInfo GetFrameInfo(TRef input) const = 0;
};
diff --git a/yt/yt/core/compression/zstd.cpp b/yt/yt/core/compression/zstd.cpp
index 5d64d69f80..8772de735b 100644
--- a/yt/yt/core/compression/zstd.cpp
+++ b/yt/yt/core/compression/zstd.cpp
@@ -220,7 +220,12 @@ TDictionaryCompressionFrameInfo ZstdGetFrameInfo(TRef input)
input.Begin(),
input.Size(),
ZSTD_f_zstd1_magicless);
- YT_VERIFY(result == 0);
+ if (result != 0) {
+ THROW_ERROR_EXCEPTION("Failed to get frame header")
+ << TErrorAttribute("code", result)
+ << TErrorAttribute("is_error", ZSTD_isError(result))
+ << TErrorAttribute("error", ZSTD_getErrorName(result));
+ }
return {
.ContentSize = frameHeader.frameContentSize,
@@ -488,11 +493,17 @@ IDigestedCompressionDictionaryPtr ZstdCreateDigestedCompressionDictionary(
int compressionLevel)
{
YT_VERIFY(compressionDictionary);
+ YT_VERIFY(compressionLevel >= 0 && compressionLevel <= ZstdGetMaxCompressionLevel());
auto* digestedDictionary = ZSTD_createCDict(
compressionDictionary.Begin(),
compressionDictionary.Size(),
compressionLevel);
+ if (!digestedDictionary) {
+ THROW_ERROR_EXCEPTION("Failed to create digested compression dictionary")
+ << TErrorAttribute("compression_level", compressionLevel)
+ << TErrorAttribute("dictionary_size", compressionDictionary.Size());
+ }
return New<TDigestedCompressionDictionary>(digestedDictionary);
}
@@ -504,6 +515,10 @@ IDigestedDecompressionDictionaryPtr ZstdCreateDigestedDecompressionDictionary(
auto* digestedDictionary = ZSTD_createDDict(
compressionDictionary.Begin(),
compressionDictionary.Size());
+ if (!digestedDictionary) {
+ THROW_ERROR_EXCEPTION("Failed to create digested decompression dictionary")
+ << TErrorAttribute("dictionary_size", compressionDictionary.Size());
+ }
return New<TDigestedDecompressionDictionary>(digestedDictionary);
}