diff options
author | babenko <babenko@yandex-team.com> | 2024-04-19 23:42:47 +0300 |
---|---|---|
committer | babenko <babenko@yandex-team.com> | 2024-04-19 23:55:53 +0300 |
commit | f96c3c433e52fe4051c0fb14ba23b115a99d8b51 (patch) | |
tree | b7ea6a35f4d32debfff824c7aceaf765f36e9071 | |
parent | a8818e9bb2e10cbd941538218d96926809e1c25f (diff) | |
download | ydb-f96c3c433e52fe4051c0fb14ba23b115a99d8b51.tar.gz |
Avoid passing columnPresenceBuffer around
ed3889fb0e8fb71a059686f7c4133e9058935718
-rw-r--r-- | library/cpp/yt/memory/tls_scratch-inl.h | 28 | ||||
-rw-r--r-- | library/cpp/yt/memory/tls_scratch.h | 20 | ||||
-rw-r--r-- | yt/yt/client/table_client/row_buffer.cpp | 11 | ||||
-rw-r--r-- | yt/yt/client/table_client/row_buffer.h | 6 | ||||
-rw-r--r-- | yt/yt/client/table_client/unversioned_row.cpp | 16 | ||||
-rw-r--r-- | yt/yt/client/table_client/unversioned_row.h | 3 | ||||
-rw-r--r-- | yt/yt/client/table_client/versioned_row.cpp | 13 | ||||
-rw-r--r-- | yt/yt/client/table_client/versioned_row.h | 1 |
8 files changed, 70 insertions, 28 deletions
diff --git a/library/cpp/yt/memory/tls_scratch-inl.h b/library/cpp/yt/memory/tls_scratch-inl.h new file mode 100644 index 0000000000..32c26d67c0 --- /dev/null +++ b/library/cpp/yt/memory/tls_scratch-inl.h @@ -0,0 +1,28 @@ +#ifndef TLS_SCRATH_INL_H_ +#error "Direct inclusion of this file is not allowed, include tls_scratch.h" +// For the sake of sane code completion. +#include "tls_scratch.h" +#endif + +#include <library/cpp/yt/misc/tls.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +template <class T> +TMutableRange<T> GetTlsScratchBuffer(size_t size) +{ + // This is a workround for std::vector<bool>. + using TBoxed = std::array<T, 1>; + YT_THREAD_LOCAL(std::vector<TBoxed>) tlsVector; + auto& vector = GetTlsRef(tlsVector); + vector.reserve(size); + auto range = TMutableRange(reinterpret_cast<T*>(vector.data()), size); + std::fill(range.begin(), range.end(), T()); + return range; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/memory/tls_scratch.h b/library/cpp/yt/memory/tls_scratch.h new file mode 100644 index 0000000000..20e306f222 --- /dev/null +++ b/library/cpp/yt/memory/tls_scratch.h @@ -0,0 +1,20 @@ +#pragma once + +#include "range.h" + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +//! Returns a temporary buffer (stored on TLS) of a given size. +//! The content is initialized with default values of T before being returned. +template <class T> +TMutableRange<T> GetTlsScratchBuffer(size_t size); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define TLS_SCRATH_INL_H_ +#include "tls_scratch-inl.h" +#undef TLS_SCRATH_INL_H_ diff --git a/yt/yt/client/table_client/row_buffer.cpp b/yt/yt/client/table_client/row_buffer.cpp index dddcbd9ce3..40a2703ea5 100644 --- a/yt/yt/client/table_client/row_buffer.cpp +++ b/yt/yt/client/table_client/row_buffer.cpp @@ -115,14 +115,14 @@ TMutableUnversionedRow TRowBuffer::CaptureAndPermuteRow( const TTableSchema& tableSchema, int schemafulColumnCount, const TNameTableToSchemaIdMapping& idMapping, - std::vector<bool>* columnPresenceBuffer, + bool validateDuplicateAndRequiredValueColumns, bool preserveIds, std::optional<TUnversionedValue> addend) { int valueCount = schemafulColumnCount; - if (columnPresenceBuffer) { - ValidateDuplicateAndRequiredValueColumns(row, tableSchema, idMapping, columnPresenceBuffer); + if (validateDuplicateAndRequiredValueColumns) { + ValidateDuplicateAndRequiredValueColumns(row, tableSchema, idMapping); } for (const auto& value : row) { @@ -212,7 +212,7 @@ TMutableVersionedRow TRowBuffer::CaptureAndPermuteRow( TVersionedRow row, const TTableSchema& tableSchema, const TNameTableToSchemaIdMapping& idMapping, - std::vector<bool>* columnPresenceBuffer, + bool validateDuplicateAndRequiredValueColumns, bool allowMissingKeyColumns) { int keyColumnCount = tableSchema.GetKeyColumnCount(); @@ -242,12 +242,11 @@ TMutableVersionedRow TRowBuffer::CaptureAndPermuteRow( writeTimestamps.erase(std::unique(writeTimestamps.begin(), writeTimestamps.end()), writeTimestamps.end()); int writeTimestampCount = static_cast<int>(writeTimestamps.size()); - if (columnPresenceBuffer) { + if (validateDuplicateAndRequiredValueColumns) { ValidateDuplicateAndRequiredValueColumns( row, tableSchema, idMapping, - columnPresenceBuffer, writeTimestamps.data(), writeTimestampCount); } diff --git a/yt/yt/client/table_client/row_buffer.h b/yt/yt/client/table_client/row_buffer.h index 4609604673..2f9a8d4d22 100644 --- a/yt/yt/client/table_client/row_buffer.h +++ b/yt/yt/client/table_client/row_buffer.h @@ -89,9 +89,9 @@ public: const TTableSchema& tableSchema, int schemafulColumnCount, const TNameTableToSchemaIdMapping& idMapping, - std::vector<bool>* columnPresenceBuffer, + bool validateDuplicateAndRequiredValueColumns, bool preserveIds = false, - std::optional<TUnversionedValue> addend = std::nullopt); + std::optional<TUnversionedValue> addend = {}); //! Captures the row applying #idMapping to value ids. //! #idMapping must be identity for key columns. @@ -100,7 +100,7 @@ public: TVersionedRow row, const TTableSchema& tableSchema, const TNameTableToSchemaIdMapping& idMapping, - std::vector<bool>* columnPresenceBuffer, + bool validateDuplicateAndRequiredValueColumns, bool allowMissingKeyColumns = false); i64 GetSize() const; diff --git a/yt/yt/client/table_client/unversioned_row.cpp b/yt/yt/client/table_client/unversioned_row.cpp index 18cfd89d7e..cbf238516c 100644 --- a/yt/yt/client/table_client/unversioned_row.cpp +++ b/yt/yt/client/table_client/unversioned_row.cpp @@ -22,6 +22,8 @@ #include <library/cpp/yt/misc/hash.h> +#include <library/cpp/yt/memory/tls_scratch.h> + #include <library/cpp/yt/farmhash/farm_hash.h> #include <library/cpp/yt/coding/varint.h> @@ -1235,13 +1237,9 @@ void ValidateClientDataRow( void ValidateDuplicateAndRequiredValueColumns( TUnversionedRow row, const TTableSchema& schema, - const TNameTableToSchemaIdMapping& idMapping, - std::vector<bool>* columnPresenceBuffer) + const TNameTableToSchemaIdMapping& idMapping) { - auto& columnSeen = *columnPresenceBuffer; - YT_VERIFY(std::ssize(columnSeen) >= schema.GetColumnCount()); - std::fill(columnSeen.begin(), columnSeen.end(), 0); - + auto columnSeenFlags = GetTlsScratchBuffer<bool>(schema.GetColumnCount()); for (const auto& value : row) { int mappedId = ApplyIdMapping(value, &idMapping); if (mappedId < 0) { @@ -1249,17 +1247,17 @@ void ValidateDuplicateAndRequiredValueColumns( } const auto& column = schema.Columns()[mappedId]; - if (columnSeen[mappedId]) { + if (columnSeenFlags[mappedId]) { THROW_ERROR_EXCEPTION( NTableClient::EErrorCode::DuplicateColumnInSchema, "Duplicate column %v in table schema", column.GetDiagnosticNameString()); } - columnSeen[mappedId] = true; + columnSeenFlags[mappedId] = true; } for (int index = schema.GetKeyColumnCount(); index < schema.GetColumnCount(); ++index) { - if (!columnSeen[index] && schema.Columns()[index].Required()) { + if (!columnSeenFlags[index] && schema.Columns()[index].Required()) { THROW_ERROR_EXCEPTION( NTableClient::EErrorCode::MissingRequiredColumnInSchema, "Missing required column %v in table schema", diff --git a/yt/yt/client/table_client/unversioned_row.h b/yt/yt/client/table_client/unversioned_row.h index f23a178366..24fd6a9288 100644 --- a/yt/yt/client/table_client/unversioned_row.h +++ b/yt/yt/client/table_client/unversioned_row.h @@ -439,8 +439,7 @@ void ValidateClientDataRow( void ValidateDuplicateAndRequiredValueColumns( TUnversionedRow row, const TTableSchema& schema, - const TNameTableToSchemaIdMapping& idMapping, - std::vector<bool>* columnPresenceBuffer); + const TNameTableToSchemaIdMapping& idMapping); //! Checks that #row contains write lock for non-key columns and returns true if any non-key columns encountered. bool ValidateNonKeyColumnsAgainstLock( diff --git a/yt/yt/client/table_client/versioned_row.cpp b/yt/yt/client/table_client/versioned_row.cpp index e06544fac2..013a3d1bee 100644 --- a/yt/yt/client/table_client/versioned_row.cpp +++ b/yt/yt/client/table_client/versioned_row.cpp @@ -6,6 +6,8 @@ #include <library/cpp/yt/coding/varint.h> +#include <library/cpp/yt/memory/tls_scratch.h> + #include <numeric> namespace NYT::NTableClient { @@ -220,7 +222,6 @@ void ValidateDuplicateAndRequiredValueColumns( TVersionedRow row, const TTableSchema& schema, const TNameTableToSchemaIdMapping& idMapping, - std::vector<bool>* columnPresenceBuffer, const TTimestamp* writeTimestamps, int writeTimestampCount) { @@ -228,9 +229,7 @@ void ValidateDuplicateAndRequiredValueColumns( return; } - auto& columnSeen = *columnPresenceBuffer; - YT_VERIFY(std::ssize(columnSeen) >= schema.GetColumnCount()); - std::fill(columnSeen.begin(), columnSeen.end(), 0); + auto columnSeenFlags = GetTlsScratchBuffer<bool>(schema.GetColumnCount()); for (const auto *valueGroupBeginIt = row.BeginValues(), *valueGroupEndIt = valueGroupBeginIt; valueGroupBeginIt != row.EndValues(); @@ -246,11 +245,11 @@ void ValidateDuplicateAndRequiredValueColumns( } const auto& column = schema.Columns()[mappedId]; - if (columnSeen[mappedId]) { + if (columnSeenFlags[mappedId]) { THROW_ERROR_EXCEPTION("Duplicate value group %v in versioned row", column.GetDiagnosticNameString()); } - columnSeen[mappedId] = true; + columnSeenFlags[mappedId] = true; if (column.Required()) { auto mismatch = std::mismatch( @@ -279,7 +278,7 @@ void ValidateDuplicateAndRequiredValueColumns( } for (int index = schema.GetKeyColumnCount(); index < schema.GetColumnCount(); ++index) { - if (!columnSeen[index] && schema.Columns()[index].Required()) { + if (!columnSeenFlags[index] && schema.Columns()[index].Required()) { THROW_ERROR_EXCEPTION("Missing values for required column %v", schema.Columns()[index].GetDiagnosticNameString()); } diff --git a/yt/yt/client/table_client/versioned_row.h b/yt/yt/client/table_client/versioned_row.h index 2ab7a9874f..8d72048d0c 100644 --- a/yt/yt/client/table_client/versioned_row.h +++ b/yt/yt/client/table_client/versioned_row.h @@ -294,7 +294,6 @@ void ValidateDuplicateAndRequiredValueColumns( TVersionedRow row, const TTableSchema& schema, const TNameTableToSchemaIdMapping& idMapping, - std::vector<bool>* columnPresenceBuffer, const TTimestamp* writeTimestamps, int writeTimestampCount); |