aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVasily Gerasimov <UgnineSirdis@ydb.tech>2025-04-24 15:24:31 +0300
committerGitHub <noreply@github.com>2025-04-24 15:24:31 +0300
commit7ee40404ec03b89f39468de0b342d71eefea82ab (patch)
tree77bb980c3b6df131edb8c67d64ce0aec62d78f66
parent08dc695276aa3af8fbbf043e4c3daee326dde145 (diff)
downloadydb-7ee40404ec03b89f39468de0b342d71eefea82ab.tar.gz
Support zero data encrypted files in import (#17620)
-rw-r--r--ydb/core/tx/datashard/import_s3.cpp27
-rw-r--r--ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp324
2 files changed, 231 insertions, 120 deletions
diff --git a/ydb/core/tx/datashard/import_s3.cpp b/ydb/core/tx/datashard/import_s3.cpp
index 035e5230baa..6ac247a332c 100644
--- a/ydb/core/tx/datashard/import_s3.cpp
+++ b/ydb/core/tx/datashard/import_s3.cpp
@@ -314,10 +314,12 @@ class TS3Downloader: public TActorBootstrapped<TS3Downloader> {
// Each block contains at least one row of data with '\n',
// so we will always get some data from DataController.
ReadyInputBytes += processedAfter - processedBefore;
- DataController->Feed(TString(block->Data(), block->Size()), Last);
- const EDataStatus status = DataController->TryGetData(data, error);
- Y_ENSURE(status == READY_DATA);
- return status;
+ if (block->Size()) {
+ DataController->Feed(TString(block->Data(), block->Size()), Last);
+ const EDataStatus status = DataController->TryGetData(data, error);
+ Y_ENSURE(status == READY_DATA);
+ }
+ return READY_DATA;
} else {
return NOT_ENOUGH_DATA;
}
@@ -519,6 +521,13 @@ class TS3Downloader: public TActorBootstrapped<TS3Downloader> {
ETag = result.GetResult().GetETag();
ContentLength = result.GetResult().GetContentLength();
+ if (!ContentLength && Settings.EncryptionSettings.EncryptedBackup) {
+ // Encrypted file can not have zero length
+ const TString error = "File is corrupted";
+ IMPORT_LOG_E(error);
+ return Finish(false, error);
+ }
+
if (Checksum) {
HeadObject(ChecksumKey(Settings.GetDataKey(DataFormat, ECompressionCodec::None)));
Become(&TThis::StateDownloadChecksum);
@@ -664,8 +673,14 @@ class TS3Downloader: public TActorBootstrapped<TS3Downloader> {
}
RequestBuilder.New(TableInfo, Scheme);
- TMemoryPool pool(256);
- while (ProcessData(data, pool));
+
+ // Special case:
+ // in encrypted file we have nonzero bytes on input, but can still have zero bytes on output
+ // In this case TryGetData() returns READY_DATA
+ if (data) {
+ TMemoryPool pool(256);
+ while (ProcessData(data, pool));
+ }
if (const auto processed = Reader->ReadyBytes()) { // has progress
ProcessedBytes += processed;
diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp
index 0693679da59..53b84266e07 100644
--- a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp
+++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp
@@ -1564,7 +1564,7 @@ value {
}
}
- void ExportImportOnSupportedDatatypesImpl(bool encrypted, bool commonPrefix) {
+ void ExportImportOnSupportedDatatypesImpl(bool encrypted, bool commonPrefix, bool emptyTable = false) {
TTestBasicRuntime runtime;
TTestEnv env(runtime, TTestEnvOptions().EnableParameterizedDecimal(true));
runtime.GetAppData().FeatureFlags.SetEnableEncryptedExport(true);
@@ -1601,55 +1601,57 @@ value {
)_");
env.TestWaitNotification(runtime, txId);
- const int partitionIdx = 0;
-
- const TVector<TCell> keys = {TCell::Make(1ull)};
-
- const TString string = "test string";
- const TString json = R"({"key": "value"})";
- auto binaryJson = NBinaryJson::SerializeToBinaryJson(json);
- Y_ABORT_UNLESS(std::holds_alternative<NBinaryJson::TBinaryJson>(binaryJson));
- const auto& binaryJsonValue = std::get<NBinaryJson::TBinaryJson>(binaryJson);
-
- const std::pair<ui64, ui64> decimal = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("16.17", NScheme::DECIMAL_PRECISION, NScheme::DECIMAL_SCALE));
- const std::pair<ui64, ui64> decimal35 = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("555555555555555.123456789", 35, 10));
- const TString dynumber = *NDyNumber::ParseDyNumberString("18");
-
- char uuid[16];
- NUuid::ParseUuidToArray(TString("65df1ec1-a97d-47b2-ae56-3c023da6ee8c"), reinterpret_cast<ui16*>(uuid), false);
-
- const TVector<TCell> values = {
- TCell::Make<i32>(-1), // Int32
- TCell::Make<ui32>(2), // Uint32
- TCell::Make<i64>(-3), // Int64
- TCell::Make<ui64>(4), // Uint64
- TCell::Make<ui8>(5), // Uint8
- TCell::Make<bool>(true), // Bool
- TCell::Make<double>(6.66), // Double
- TCell::Make<float>(7.77), // Float
- TCell::Make<ui16>(8), // Date
- TCell::Make<ui32>(9), // Datetime
- TCell::Make<ui64>(10), // Timestamp
- TCell::Make<i64>(-11), // Interval
- TCell::Make<i32>(-12), // Date32
- TCell::Make<i64>(-13), // Datetime64
- TCell::Make<i64>(-14), // Timestamp64
- TCell::Make<i64>(-15), // Interval64
- TCell::Make<std::pair<ui64, ui64>>(decimal), // Decimal
- TCell::Make<std::pair<ui64, ui64>>(decimal35), // Decimal
- TCell(dynumber.data(), dynumber.size()), // Dynumber
- TCell(string.data(), string.size()), // String
- TCell(string.data(), string.size()), // Utf8
- TCell(json.data(), json.size()), // Json
- TCell(binaryJsonValue.Data(), binaryJsonValue.Size()), // JsonDocument
- TCell(uuid, sizeof(uuid)), // Uuid
- };
-
- const TVector<ui32> keyTags = {1};
- TVector<ui32> valueTags(values.size());
- std::iota(valueTags.begin(), valueTags.end(), 2);
-
- UploadRow(runtime, "/MyRoot/Table", partitionIdx, keyTags, valueTags, keys, values);
+ if (!emptyTable) {
+ const int partitionIdx = 0;
+
+ const TVector<TCell> keys = {TCell::Make(1ull)};
+
+ const TString string = "test string";
+ const TString json = R"({"key": "value"})";
+ auto binaryJson = NBinaryJson::SerializeToBinaryJson(json);
+ Y_ABORT_UNLESS(std::holds_alternative<NBinaryJson::TBinaryJson>(binaryJson));
+ const auto& binaryJsonValue = std::get<NBinaryJson::TBinaryJson>(binaryJson);
+
+ const std::pair<ui64, ui64> decimal = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("16.17", NScheme::DECIMAL_PRECISION, NScheme::DECIMAL_SCALE));
+ const std::pair<ui64, ui64> decimal35 = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("555555555555555.123456789", 35, 10));
+ const TString dynumber = *NDyNumber::ParseDyNumberString("18");
+
+ char uuid[16];
+ NUuid::ParseUuidToArray(TString("65df1ec1-a97d-47b2-ae56-3c023da6ee8c"), reinterpret_cast<ui16*>(uuid), false);
+
+ const TVector<TCell> values = {
+ TCell::Make<i32>(-1), // Int32
+ TCell::Make<ui32>(2), // Uint32
+ TCell::Make<i64>(-3), // Int64
+ TCell::Make<ui64>(4), // Uint64
+ TCell::Make<ui8>(5), // Uint8
+ TCell::Make<bool>(true), // Bool
+ TCell::Make<double>(6.66), // Double
+ TCell::Make<float>(7.77), // Float
+ TCell::Make<ui16>(8), // Date
+ TCell::Make<ui32>(9), // Datetime
+ TCell::Make<ui64>(10), // Timestamp
+ TCell::Make<i64>(-11), // Interval
+ TCell::Make<i32>(-12), // Date32
+ TCell::Make<i64>(-13), // Datetime64
+ TCell::Make<i64>(-14), // Timestamp64
+ TCell::Make<i64>(-15), // Interval64
+ TCell::Make<std::pair<ui64, ui64>>(decimal), // Decimal
+ TCell::Make<std::pair<ui64, ui64>>(decimal35), // Decimal
+ TCell(dynumber.data(), dynumber.size()), // Dynumber
+ TCell(string.data(), string.size()), // String
+ TCell(string.data(), string.size()), // Utf8
+ TCell(json.data(), json.size()), // Json
+ TCell(binaryJsonValue.Data(), binaryJsonValue.Size()), // JsonDocument
+ TCell(uuid, sizeof(uuid)), // Uuid
+ };
+
+ const TVector<ui32> keyTags = {1};
+ TVector<ui32> valueTags(values.size());
+ std::iota(valueTags.begin(), valueTags.end(), 2);
+
+ UploadRow(runtime, "/MyRoot/Table", partitionIdx, keyTags, valueTags, keys, values);
+ }
TPortManager portManager;
const ui16 port = portManager.GetPort();
@@ -1716,70 +1718,71 @@ value {
env.TestWaitNotification(runtime, txId);
TestGetImport(runtime, txId, "/MyRoot");
-
- TString expectedJson = TStringBuilder() << "[[[[["
- << "[%true];" // bool
- << "[\"" << -12 << "\"];" // date32
- << "[\"" << 8 << "\"];" // date
- << "[\"" << -13 << "\"];" // datetime64
- << "[\"" << 9 << "\"];" // datetime
- << "[\"" << "555555555555555.123456789" << "\"];" // decimal35
- << "[\"" << "16.17" << "\"];" // decimal
- << "[\"" << 6.66 << "\"];" // double
- << "[\"" << ".18e2" << "\"];" // dynumber
- << "[\"" << 7.77f << "\"];" // float
- << "[\"" << -1 << "\"];" // int32
- << "[\"" << -3 << "\"];" // int64
- << "[\"" << -15 << "\"];" // interval64
- << "[\"" << -11 << "\"];" // interval
- << "[\"" << "{\\\"key\\\": \\\"value\\\"}" << "\"];" // json
- << "[\"" << "{\\\"key\\\":\\\"value\\\"}" << "\"];" // jsondoc
- << "[\"" << 1 << "\"];" // key
- << "[\"" << "test string" << "\"];" // string
- << "[\"" << -14 << "\"];" // timestamp64
- << "[\"" << 10 << "\"];" // timestamp
- << "[\"" << 2 << "\"];" // uint32
- << "[\"" << 4 << "\"];" // uint64
- << "[\"" << 5 << "\"];" // uint8
- << "[\"" << "test string" << "\"];" // utf8
- << "[[\"" << "wR7fZX2pskeuVjwCPabujA==" << "\"]]" // uuid
- << "]];\%false]]]";
-
- const TReadKeyDesc readKeyDesc = {"key", "Uint64", "0"};
-
- const TVector<TString> readColumns = {
- "key",
- "int32_value",
- "uint32_value",
- "int64_value",
- "uint64_value",
- "uint8_value",
- "bool_value",
- "double_value",
- "float_value",
- "date_value",
- "datetime_value",
- "timestamp_value",
- "interval_value",
- "date32_value",
- "datetime64_value",
- "timestamp64_value",
- "interval64_value",
- "decimal_value",
- "decimal35_value",
- "dynumber_value",
- "string_value",
- "utf8_value",
- "json_value",
- "jsondoc_value",
- "uuid_value",
- };
-
- auto contentOriginalTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets, "Table", readKeyDesc, readColumns);
- NKqp::CompareYson(expectedJson, contentOriginalTable);
-
- auto contentRestoredTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets + 2, commonPrefix ? "Table" : "Restored", readKeyDesc, readColumns);
- NKqp::CompareYson(expectedJson, contentRestoredTable);
+ if (!emptyTable) {
+ TString expectedJson = TStringBuilder() << "[[[[["
+ << "[%true];" // bool
+ << "[\"" << -12 << "\"];" // date32
+ << "[\"" << 8 << "\"];" // date
+ << "[\"" << -13 << "\"];" // datetime64
+ << "[\"" << 9 << "\"];" // datetime
+ << "[\"" << "555555555555555.123456789" << "\"];" // decimal35
+ << "[\"" << "16.17" << "\"];" // decimal
+ << "[\"" << 6.66 << "\"];" // double
+ << "[\"" << ".18e2" << "\"];" // dynumber
+ << "[\"" << 7.77f << "\"];" // float
+ << "[\"" << -1 << "\"];" // int32
+ << "[\"" << -3 << "\"];" // int64
+ << "[\"" << -15 << "\"];" // interval64
+ << "[\"" << -11 << "\"];" // interval
+ << "[\"" << "{\\\"key\\\": \\\"value\\\"}" << "\"];" // json
+ << "[\"" << "{\\\"key\\\":\\\"value\\\"}" << "\"];" // jsondoc
+ << "[\"" << 1 << "\"];" // key
+ << "[\"" << "test string" << "\"];" // string
+ << "[\"" << -14 << "\"];" // timestamp64
+ << "[\"" << 10 << "\"];" // timestamp
+ << "[\"" << 2 << "\"];" // uint32
+ << "[\"" << 4 << "\"];" // uint64
+ << "[\"" << 5 << "\"];" // uint8
+ << "[\"" << "test string" << "\"];" // utf8
+ << "[[\"" << "wR7fZX2pskeuVjwCPabujA==" << "\"]]" // uuid
+ << "]];\%false]]]";
+
+ const TReadKeyDesc readKeyDesc = {"key", "Uint64", "0"};
+
+ const TVector<TString> readColumns = {
+ "key",
+ "int32_value",
+ "uint32_value",
+ "int64_value",
+ "uint64_value",
+ "uint8_value",
+ "bool_value",
+ "double_value",
+ "float_value",
+ "date_value",
+ "datetime_value",
+ "timestamp_value",
+ "interval_value",
+ "date32_value",
+ "datetime64_value",
+ "timestamp64_value",
+ "interval64_value",
+ "decimal_value",
+ "decimal35_value",
+ "dynumber_value",
+ "string_value",
+ "utf8_value",
+ "json_value",
+ "jsondoc_value",
+ "uuid_value",
+ };
+
+ auto contentOriginalTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets, "Table", readKeyDesc, readColumns);
+ NKqp::CompareYson(expectedJson, contentOriginalTable);
+
+ auto contentRestoredTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets + 2, commonPrefix ? "Table" : "Restored", readKeyDesc, readColumns);
+ NKqp::CompareYson(expectedJson, contentRestoredTable);
+ }
}
Y_UNIT_TEST(ExportImportOnSupportedDatatypes) {
@@ -1794,6 +1797,99 @@ value {
ExportImportOnSupportedDatatypesImpl(true, true);
}
+ Y_UNIT_TEST(ExportImportOnSupportedDatatypesEncryptedNoData) {
+ ExportImportOnSupportedDatatypesImpl(true, true, true);
+ }
+
+ Y_UNIT_TEST(ZeroLengthEncryptedFileTreatedAsCorrupted) {
+ TTestBasicRuntime runtime;
+ TTestEnv env(runtime, TTestEnvOptions().EnableParameterizedDecimal(true));
+ runtime.GetAppData().FeatureFlags.SetEnableEncryptedExport(true);
+ ui64 txId = 100;
+
+ TestCreateTable(runtime, ++txId, "/MyRoot", R"_(
+ Name: "Table"
+ Columns { Name: "key" Type: "Uint64" }
+ Columns { Name: "value" Type: "String" }
+ KeyColumnNames: ["key"]
+ )_");
+ env.TestWaitNotification(runtime, txId);
+
+ TPortManager portManager;
+ const ui16 port = portManager.GetPort();
+
+ TS3Mock s3Mock({}, TS3Mock::TSettings(port));
+ UNIT_ASSERT(s3Mock.Start());
+
+ TestExport(runtime, ++txId, "/MyRoot", Sprintf(R"(
+ ExportToS3Settings {
+ endpoint: "localhost:%d"
+ scheme: HTTP
+ source_path: "/MyRoot"
+ destination_prefix: "BackupPrefix"
+ items {
+ source_path: "Table"
+ }
+ encryption_settings {
+ encryption_algorithm: "ChaCha20-Poly1305"
+ symmetric_key {
+ key: "Very very secret export key!!!!!"
+ }
+ }
+ }
+ )", port));
+ env.TestWaitNotification(runtime, txId);
+ TestGetExport(runtime, txId, "/MyRoot");
+
+ // Successfully imports
+ TestImport(runtime, ++txId, "/MyRoot", Sprintf(R"(
+ ImportFromS3Settings {
+ endpoint: "localhost:%d"
+ scheme: HTTP
+ source_prefix: "BackupPrefix"
+ destination_path: "/MyRoot/Restored"
+ encryption_settings {
+ symmetric_key {
+ key: "Very very secret export key!!!!!"
+ }
+ }
+ }
+ )", port));
+ env.TestWaitNotification(runtime, txId);
+ TestGetImport(runtime, txId, "/MyRoot");
+
+ // Delete data from different files
+ auto checkFailsIfFileIsEmpty = [&](const TString& fileName) {
+ TString& data = s3Mock.GetData()[fileName];
+ UNIT_ASSERT(!data.empty());
+ TString srcData = data;
+ data.clear();
+
+ TestImport(runtime, ++txId, "/MyRoot", Sprintf(R"(
+ ImportFromS3Settings {
+ endpoint: "localhost:%d"
+ scheme: HTTP
+ source_prefix: "BackupPrefix"
+ destination_path: "/MyRoot/Restored2"
+ encryption_settings {
+ symmetric_key {
+ key: "Very very secret export key!!!!!"
+ }
+ }
+ }
+ )", port));
+ env.TestWaitNotification(runtime, txId);
+ TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::CANCELLED);
+
+ data = srcData;
+ };
+
+ checkFailsIfFileIsEmpty("/BackupPrefix/SchemaMapping/metadata.json.enc");
+ checkFailsIfFileIsEmpty("/BackupPrefix/SchemaMapping/mapping.json.enc");
+ checkFailsIfFileIsEmpty("/BackupPrefix/001/data_00.csv.enc");
+ checkFailsIfFileIsEmpty("/BackupPrefix/001/metadata.json.enc");
+ }
+
Y_UNIT_TEST(ExportImportPg) {
TTestBasicRuntime runtime;
TTestEnv env(runtime, TTestEnvOptions().EnableTablePgTypes(true));