diff options
author | Vasily Gerasimov <UgnineSirdis@ydb.tech> | 2025-04-24 15:24:31 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-24 15:24:31 +0300 |
commit | 7ee40404ec03b89f39468de0b342d71eefea82ab (patch) | |
tree | 77bb980c3b6df131edb8c67d64ce0aec62d78f66 | |
parent | 08dc695276aa3af8fbbf043e4c3daee326dde145 (diff) | |
download | ydb-7ee40404ec03b89f39468de0b342d71eefea82ab.tar.gz |
Support zero data encrypted files in import (#17620)
-rw-r--r-- | ydb/core/tx/datashard/import_s3.cpp | 27 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp | 324 |
2 files changed, 231 insertions, 120 deletions
diff --git a/ydb/core/tx/datashard/import_s3.cpp b/ydb/core/tx/datashard/import_s3.cpp index 035e5230baa..6ac247a332c 100644 --- a/ydb/core/tx/datashard/import_s3.cpp +++ b/ydb/core/tx/datashard/import_s3.cpp @@ -314,10 +314,12 @@ class TS3Downloader: public TActorBootstrapped<TS3Downloader> { // Each block contains at least one row of data with '\n', // so we will always get some data from DataController. ReadyInputBytes += processedAfter - processedBefore; - DataController->Feed(TString(block->Data(), block->Size()), Last); - const EDataStatus status = DataController->TryGetData(data, error); - Y_ENSURE(status == READY_DATA); - return status; + if (block->Size()) { + DataController->Feed(TString(block->Data(), block->Size()), Last); + const EDataStatus status = DataController->TryGetData(data, error); + Y_ENSURE(status == READY_DATA); + } + return READY_DATA; } else { return NOT_ENOUGH_DATA; } @@ -519,6 +521,13 @@ class TS3Downloader: public TActorBootstrapped<TS3Downloader> { ETag = result.GetResult().GetETag(); ContentLength = result.GetResult().GetContentLength(); + if (!ContentLength && Settings.EncryptionSettings.EncryptedBackup) { + // Encrypted file can not have zero length + const TString error = "File is corrupted"; + IMPORT_LOG_E(error); + return Finish(false, error); + } + if (Checksum) { HeadObject(ChecksumKey(Settings.GetDataKey(DataFormat, ECompressionCodec::None))); Become(&TThis::StateDownloadChecksum); @@ -664,8 +673,14 @@ class TS3Downloader: public TActorBootstrapped<TS3Downloader> { } RequestBuilder.New(TableInfo, Scheme); - TMemoryPool pool(256); - while (ProcessData(data, pool)); + + // Special case: + // in encrypted file we have nonzero bytes on input, but can still have zero bytes on output + // In this case TryGetData() returns READY_DATA + if (data) { + TMemoryPool pool(256); + while (ProcessData(data, pool)); + } if (const auto processed = Reader->ReadyBytes()) { // has progress ProcessedBytes += processed; diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp index 0693679da59..53b84266e07 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp @@ -1564,7 +1564,7 @@ value { } } - void ExportImportOnSupportedDatatypesImpl(bool encrypted, bool commonPrefix) { + void ExportImportOnSupportedDatatypesImpl(bool encrypted, bool commonPrefix, bool emptyTable = false) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableParameterizedDecimal(true)); runtime.GetAppData().FeatureFlags.SetEnableEncryptedExport(true); @@ -1601,55 +1601,57 @@ value { )_"); env.TestWaitNotification(runtime, txId); - const int partitionIdx = 0; - - const TVector<TCell> keys = {TCell::Make(1ull)}; - - const TString string = "test string"; - const TString json = R"({"key": "value"})"; - auto binaryJson = NBinaryJson::SerializeToBinaryJson(json); - Y_ABORT_UNLESS(std::holds_alternative<NBinaryJson::TBinaryJson>(binaryJson)); - const auto& binaryJsonValue = std::get<NBinaryJson::TBinaryJson>(binaryJson); - - const std::pair<ui64, ui64> decimal = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("16.17", NScheme::DECIMAL_PRECISION, NScheme::DECIMAL_SCALE)); - const std::pair<ui64, ui64> decimal35 = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("555555555555555.123456789", 35, 10)); - const TString dynumber = *NDyNumber::ParseDyNumberString("18"); - - char uuid[16]; - NUuid::ParseUuidToArray(TString("65df1ec1-a97d-47b2-ae56-3c023da6ee8c"), reinterpret_cast<ui16*>(uuid), false); - - const TVector<TCell> values = { - TCell::Make<i32>(-1), // Int32 - TCell::Make<ui32>(2), // Uint32 - TCell::Make<i64>(-3), // Int64 - TCell::Make<ui64>(4), // Uint64 - TCell::Make<ui8>(5), // Uint8 - TCell::Make<bool>(true), // Bool - TCell::Make<double>(6.66), // Double - TCell::Make<float>(7.77), // Float - TCell::Make<ui16>(8), // Date - TCell::Make<ui32>(9), // Datetime - TCell::Make<ui64>(10), // Timestamp - TCell::Make<i64>(-11), // Interval - TCell::Make<i32>(-12), // Date32 - TCell::Make<i64>(-13), // Datetime64 - TCell::Make<i64>(-14), // Timestamp64 - TCell::Make<i64>(-15), // Interval64 - TCell::Make<std::pair<ui64, ui64>>(decimal), // Decimal - TCell::Make<std::pair<ui64, ui64>>(decimal35), // Decimal - TCell(dynumber.data(), dynumber.size()), // Dynumber - TCell(string.data(), string.size()), // String - TCell(string.data(), string.size()), // Utf8 - TCell(json.data(), json.size()), // Json - TCell(binaryJsonValue.Data(), binaryJsonValue.Size()), // JsonDocument - TCell(uuid, sizeof(uuid)), // Uuid - }; - - const TVector<ui32> keyTags = {1}; - TVector<ui32> valueTags(values.size()); - std::iota(valueTags.begin(), valueTags.end(), 2); - - UploadRow(runtime, "/MyRoot/Table", partitionIdx, keyTags, valueTags, keys, values); + if (!emptyTable) { + const int partitionIdx = 0; + + const TVector<TCell> keys = {TCell::Make(1ull)}; + + const TString string = "test string"; + const TString json = R"({"key": "value"})"; + auto binaryJson = NBinaryJson::SerializeToBinaryJson(json); + Y_ABORT_UNLESS(std::holds_alternative<NBinaryJson::TBinaryJson>(binaryJson)); + const auto& binaryJsonValue = std::get<NBinaryJson::TBinaryJson>(binaryJson); + + const std::pair<ui64, ui64> decimal = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("16.17", NScheme::DECIMAL_PRECISION, NScheme::DECIMAL_SCALE)); + const std::pair<ui64, ui64> decimal35 = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("555555555555555.123456789", 35, 10)); + const TString dynumber = *NDyNumber::ParseDyNumberString("18"); + + char uuid[16]; + NUuid::ParseUuidToArray(TString("65df1ec1-a97d-47b2-ae56-3c023da6ee8c"), reinterpret_cast<ui16*>(uuid), false); + + const TVector<TCell> values = { + TCell::Make<i32>(-1), // Int32 + TCell::Make<ui32>(2), // Uint32 + TCell::Make<i64>(-3), // Int64 + TCell::Make<ui64>(4), // Uint64 + TCell::Make<ui8>(5), // Uint8 + TCell::Make<bool>(true), // Bool + TCell::Make<double>(6.66), // Double + TCell::Make<float>(7.77), // Float + TCell::Make<ui16>(8), // Date + TCell::Make<ui32>(9), // Datetime + TCell::Make<ui64>(10), // Timestamp + TCell::Make<i64>(-11), // Interval + TCell::Make<i32>(-12), // Date32 + TCell::Make<i64>(-13), // Datetime64 + TCell::Make<i64>(-14), // Timestamp64 + TCell::Make<i64>(-15), // Interval64 + TCell::Make<std::pair<ui64, ui64>>(decimal), // Decimal + TCell::Make<std::pair<ui64, ui64>>(decimal35), // Decimal + TCell(dynumber.data(), dynumber.size()), // Dynumber + TCell(string.data(), string.size()), // String + TCell(string.data(), string.size()), // Utf8 + TCell(json.data(), json.size()), // Json + TCell(binaryJsonValue.Data(), binaryJsonValue.Size()), // JsonDocument + TCell(uuid, sizeof(uuid)), // Uuid + }; + + const TVector<ui32> keyTags = {1}; + TVector<ui32> valueTags(values.size()); + std::iota(valueTags.begin(), valueTags.end(), 2); + + UploadRow(runtime, "/MyRoot/Table", partitionIdx, keyTags, valueTags, keys, values); + } TPortManager portManager; const ui16 port = portManager.GetPort(); @@ -1716,70 +1718,71 @@ value { env.TestWaitNotification(runtime, txId); TestGetImport(runtime, txId, "/MyRoot"); - - TString expectedJson = TStringBuilder() << "[[[[[" - << "[%true];" // bool - << "[\"" << -12 << "\"];" // date32 - << "[\"" << 8 << "\"];" // date - << "[\"" << -13 << "\"];" // datetime64 - << "[\"" << 9 << "\"];" // datetime - << "[\"" << "555555555555555.123456789" << "\"];" // decimal35 - << "[\"" << "16.17" << "\"];" // decimal - << "[\"" << 6.66 << "\"];" // double - << "[\"" << ".18e2" << "\"];" // dynumber - << "[\"" << 7.77f << "\"];" // float - << "[\"" << -1 << "\"];" // int32 - << "[\"" << -3 << "\"];" // int64 - << "[\"" << -15 << "\"];" // interval64 - << "[\"" << -11 << "\"];" // interval - << "[\"" << "{\\\"key\\\": \\\"value\\\"}" << "\"];" // json - << "[\"" << "{\\\"key\\\":\\\"value\\\"}" << "\"];" // jsondoc - << "[\"" << 1 << "\"];" // key - << "[\"" << "test string" << "\"];" // string - << "[\"" << -14 << "\"];" // timestamp64 - << "[\"" << 10 << "\"];" // timestamp - << "[\"" << 2 << "\"];" // uint32 - << "[\"" << 4 << "\"];" // uint64 - << "[\"" << 5 << "\"];" // uint8 - << "[\"" << "test string" << "\"];" // utf8 - << "[[\"" << "wR7fZX2pskeuVjwCPabujA==" << "\"]]" // uuid - << "]];\%false]]]"; - - const TReadKeyDesc readKeyDesc = {"key", "Uint64", "0"}; - - const TVector<TString> readColumns = { - "key", - "int32_value", - "uint32_value", - "int64_value", - "uint64_value", - "uint8_value", - "bool_value", - "double_value", - "float_value", - "date_value", - "datetime_value", - "timestamp_value", - "interval_value", - "date32_value", - "datetime64_value", - "timestamp64_value", - "interval64_value", - "decimal_value", - "decimal35_value", - "dynumber_value", - "string_value", - "utf8_value", - "json_value", - "jsondoc_value", - "uuid_value", - }; - - auto contentOriginalTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets, "Table", readKeyDesc, readColumns); - NKqp::CompareYson(expectedJson, contentOriginalTable); - - auto contentRestoredTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets + 2, commonPrefix ? "Table" : "Restored", readKeyDesc, readColumns); - NKqp::CompareYson(expectedJson, contentRestoredTable); + if (!emptyTable) { + TString expectedJson = TStringBuilder() << "[[[[[" + << "[%true];" // bool + << "[\"" << -12 << "\"];" // date32 + << "[\"" << 8 << "\"];" // date + << "[\"" << -13 << "\"];" // datetime64 + << "[\"" << 9 << "\"];" // datetime + << "[\"" << "555555555555555.123456789" << "\"];" // decimal35 + << "[\"" << "16.17" << "\"];" // decimal + << "[\"" << 6.66 << "\"];" // double + << "[\"" << ".18e2" << "\"];" // dynumber + << "[\"" << 7.77f << "\"];" // float + << "[\"" << -1 << "\"];" // int32 + << "[\"" << -3 << "\"];" // int64 + << "[\"" << -15 << "\"];" // interval64 + << "[\"" << -11 << "\"];" // interval + << "[\"" << "{\\\"key\\\": \\\"value\\\"}" << "\"];" // json + << "[\"" << "{\\\"key\\\":\\\"value\\\"}" << "\"];" // jsondoc + << "[\"" << 1 << "\"];" // key + << "[\"" << "test string" << "\"];" // string + << "[\"" << -14 << "\"];" // timestamp64 + << "[\"" << 10 << "\"];" // timestamp + << "[\"" << 2 << "\"];" // uint32 + << "[\"" << 4 << "\"];" // uint64 + << "[\"" << 5 << "\"];" // uint8 + << "[\"" << "test string" << "\"];" // utf8 + << "[[\"" << "wR7fZX2pskeuVjwCPabujA==" << "\"]]" // uuid + << "]];\%false]]]"; + + const TReadKeyDesc readKeyDesc = {"key", "Uint64", "0"}; + + const TVector<TString> readColumns = { + "key", + "int32_value", + "uint32_value", + "int64_value", + "uint64_value", + "uint8_value", + "bool_value", + "double_value", + "float_value", + "date_value", + "datetime_value", + "timestamp_value", + "interval_value", + "date32_value", + "datetime64_value", + "timestamp64_value", + "interval64_value", + "decimal_value", + "decimal35_value", + "dynumber_value", + "string_value", + "utf8_value", + "json_value", + "jsondoc_value", + "uuid_value", + }; + + auto contentOriginalTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets, "Table", readKeyDesc, readColumns); + NKqp::CompareYson(expectedJson, contentOriginalTable); + + auto contentRestoredTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets + 2, commonPrefix ? "Table" : "Restored", readKeyDesc, readColumns); + NKqp::CompareYson(expectedJson, contentRestoredTable); + } } Y_UNIT_TEST(ExportImportOnSupportedDatatypes) { @@ -1794,6 +1797,99 @@ value { ExportImportOnSupportedDatatypesImpl(true, true); } + Y_UNIT_TEST(ExportImportOnSupportedDatatypesEncryptedNoData) { + ExportImportOnSupportedDatatypesImpl(true, true, true); + } + + Y_UNIT_TEST(ZeroLengthEncryptedFileTreatedAsCorrupted) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableParameterizedDecimal(true)); + runtime.GetAppData().FeatureFlags.SetEnableEncryptedExport(true); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"_( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "String" } + KeyColumnNames: ["key"] + )_"); + env.TestWaitNotification(runtime, txId); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + TestExport(runtime, ++txId, "/MyRoot", Sprintf(R"( + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + source_path: "/MyRoot" + destination_prefix: "BackupPrefix" + items { + source_path: "Table" + } + encryption_settings { + encryption_algorithm: "ChaCha20-Poly1305" + symmetric_key { + key: "Very very secret export key!!!!!" + } + } + } + )", port)); + env.TestWaitNotification(runtime, txId); + TestGetExport(runtime, txId, "/MyRoot"); + + // Successfully imports + TestImport(runtime, ++txId, "/MyRoot", Sprintf(R"( + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + source_prefix: "BackupPrefix" + destination_path: "/MyRoot/Restored" + encryption_settings { + symmetric_key { + key: "Very very secret export key!!!!!" + } + } + } + )", port)); + env.TestWaitNotification(runtime, txId); + TestGetImport(runtime, txId, "/MyRoot"); + + // Delete data from different files + auto checkFailsIfFileIsEmpty = [&](const TString& fileName) { + TString& data = s3Mock.GetData()[fileName]; + UNIT_ASSERT(!data.empty()); + TString srcData = data; + data.clear(); + + TestImport(runtime, ++txId, "/MyRoot", Sprintf(R"( + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + source_prefix: "BackupPrefix" + destination_path: "/MyRoot/Restored2" + encryption_settings { + symmetric_key { + key: "Very very secret export key!!!!!" + } + } + } + )", port)); + env.TestWaitNotification(runtime, txId); + TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::CANCELLED); + + data = srcData; + }; + + checkFailsIfFileIsEmpty("/BackupPrefix/SchemaMapping/metadata.json.enc"); + checkFailsIfFileIsEmpty("/BackupPrefix/SchemaMapping/mapping.json.enc"); + checkFailsIfFileIsEmpty("/BackupPrefix/001/data_00.csv.enc"); + checkFailsIfFileIsEmpty("/BackupPrefix/001/metadata.json.enc"); + } + Y_UNIT_TEST(ExportImportPg) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableTablePgTypes(true)); |