diff options
author | kungurtsev <[email protected]> | 2025-09-19 12:28:22 +0200 |
---|---|---|
committer | kungasc <[email protected]> | 2025-09-23 11:56:20 +0300 |
commit | 123b6376ce90c698f9c60e65f6b9639c5f03636c (patch) | |
tree | 3a2951b8fbd6b1c67f9e3770dd48f7b70484f3a8 | |
parent | ee3cb799508e266feaecb150d9baf075dd586714 (diff) |
DataShard fulltext index build scan (#25028)
-rw-r--r-- | ydb/core/base/fulltext.cpp | 42 | ||||
-rw-r--r-- | ydb/core/base/fulltext.h | 4 | ||||
-rw-r--r-- | ydb/core/base/table_index.h | 1 | ||||
-rw-r--r-- | ydb/core/base/ut/fulltext_ut.cpp | 42 | ||||
-rw-r--r-- | ydb/core/protos/tx_datashard.proto | 37 | ||||
-rw-r--r-- | ydb/core/tx/datashard/build_index/fulltext.cpp | 415 | ||||
-rw-r--r-- | ydb/core/tx/datashard/build_index/secondary_index.cpp | 13 | ||||
-rw-r--r-- | ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp | 367 | ||||
-rw-r--r-- | ydb/core/tx/datashard/build_index/ut/ya.make | 1 | ||||
-rw-r--r-- | ydb/core/tx/datashard/datashard.h | 15 | ||||
-rw-r--r-- | ydb/core/tx/datashard/datashard_impl.h | 4 | ||||
-rw-r--r-- | ydb/core/tx/datashard/ya.make | 7 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp | 2 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp | 2 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard_utils.h | 32 | ||||
-rw-r--r-- | ydb/library/services/services.proto | 1 |
16 files changed, 929 insertions, 56 deletions
diff --git a/ydb/core/base/fulltext.cpp b/ydb/core/base/fulltext.cpp index a23e83f2955..51ced489aff 100644 --- a/ydb/core/base/fulltext.cpp +++ b/ydb/core/base/fulltext.cpp @@ -137,27 +137,40 @@ TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSet return tokens; } -bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error) { +bool ValidateSettings(const NProtoBuf::RepeatedPtrField<TString>& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) { if (!settings.has_layout() || settings.layout() == Ydb::Table::FulltextIndexSettings::LAYOUT_UNSPECIFIED) { error = "layout should be set"; return false; } - if (settings.columns().size() != 1) { - error = TStringBuilder() << "fulltext index should have single column settings" - << " but have " << settings.columns().size() << " of them"; - return false; - } - for (auto column : settings.columns()) { if (!column.has_column()) { - error = "column should be set"; + error = "fulltext index settings should have a column name"; return false; } if (!column.has_analyzers()) { - error = "column analyzers should be set"; + error = "fulltext index settings should have analyzers"; return false; } + } + + if (keyColumns.size() != 1) { + error = TStringBuilder() << "fulltext index should have a single text key column" + << " but have " << keyColumns.size() << " of them"; + return false; + } + if (settings.columns().size() != 1) { + error = TStringBuilder() << "fulltext index should have a single text key column settings" + << " but have " << settings.columns().size() << " of them"; + return false; + } + if (keyColumns.at(0) != settings.columns().at(0).column()) { + error = TStringBuilder() << "fulltext index should have a single text key column " << keyColumns.at(0) << " settings" + << " but have " << settings.columns().at(0).column(); + return false; + } + + for (auto column : settings.columns()) { if (!ValidateSettings(column.analyzers(), error)) { return false; } @@ -167,7 +180,7 @@ bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString return true; } -Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVector<std::pair<TString, TString>>& settings, TString& error) { +Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const TVector<std::pair<TString, TString>>& settings, TString& error) { Ydb::Table::FulltextIndexSettings result; Ydb::Table::FulltextIndexSettings::Analyzers resultAnalyzers; @@ -209,11 +222,16 @@ Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVec { // only single-columned index is supported for now auto columnAnalyzers = result.add_columns(); - columnAnalyzers->set_column(column); + columnAnalyzers->set_column(keyColumn); columnAnalyzers->mutable_analyzers()->CopyFrom(resultAnalyzers); } - ValidateSettings(result, error); + { + NProtoBuf::RepeatedPtrField<TString> keyColumns; + TString keyColumn_ = keyColumn; + keyColumns.Add(std::move(keyColumn_)); + ValidateSettings(keyColumns, result, error); + } return result; } diff --git a/ydb/core/base/fulltext.h b/ydb/core/base/fulltext.h index b65e3043622..849b52bfc9e 100644 --- a/ydb/core/base/fulltext.h +++ b/ydb/core/base/fulltext.h @@ -8,7 +8,7 @@ namespace NKikimr::NFulltext { TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSettings::Analyzers& settings); -bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error); -Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVector<std::pair<TString, TString>>& values, TString& error); +bool ValidateSettings(const NProtoBuf::RepeatedPtrField<TString>& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error); +Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const TVector<std::pair<TString, TString>>& values, TString& error); } diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h index 201e40ae99f..a251122d0b5 100644 --- a/ydb/core/base/table_index.h +++ b/ydb/core/base/table_index.h @@ -83,6 +83,7 @@ TClusterId SetPostingParentFlag(TClusterId parent); namespace NFulltext { // TODO: support utf-8 in fulltext index + inline constexpr auto TokenType = Ydb::Type::STRING; inline constexpr const char* TokenTypeName = "String"; inline constexpr const char* TokenColumn = "__ydb_token"; diff --git a/ydb/core/base/ut/fulltext_ut.cpp b/ydb/core/base/ut/fulltext_ut.cpp index 0824748fe0d..d3cb620dca9 100644 --- a/ydb/core/base/ut/fulltext_ut.cpp +++ b/ydb/core/base/ut/fulltext_ut.cpp @@ -10,27 +10,51 @@ Y_UNIT_TEST_SUITE(NFulltext) { Ydb::Table::FulltextIndexSettings settings; TString error; - UNIT_ASSERT(!ValidateSettings(settings, error)); + NProtoBuf::RepeatedPtrField<TString> keyColumns; + keyColumns.Add("text"); + + UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); UNIT_ASSERT_VALUES_EQUAL(error, "layout should be set"); settings.set_layout(Ydb::Table::FulltextIndexSettings::FLAT); - UNIT_ASSERT(!ValidateSettings(settings, error)); - UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have single column settings but have 0 of them"); + UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column settings but have 0 of them"); auto columnSettings = settings.add_columns(); - UNIT_ASSERT(!ValidateSettings(settings, error)); - UNIT_ASSERT_VALUES_EQUAL(error, "column should be set"); + UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have a column name"); columnSettings->set_column("text"); - UNIT_ASSERT(!ValidateSettings(settings, error)); - UNIT_ASSERT_VALUES_EQUAL(error, "column analyzers should be set"); + UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have analyzers"); auto columnAnalyzers = columnSettings->mutable_analyzers(); - UNIT_ASSERT(!ValidateSettings(settings, error)); + UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); UNIT_ASSERT_VALUES_EQUAL(error, "tokenizer should be set"); columnAnalyzers->set_tokenizer(Ydb::Table::FulltextIndexSettings::STANDARD); - UNIT_ASSERT_C(ValidateSettings(settings, error), error); + { + NProtoBuf::RepeatedPtrField<TString> keyColumns; + UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error); + UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 0 of them"); + } + + { + NProtoBuf::RepeatedPtrField<TString> keyColumns; + keyColumns.Add("text2"); + UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error); + UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column text2 settings but have text"); + } + + { + NProtoBuf::RepeatedPtrField<TString> keyColumns; + keyColumns.Add("text"); + keyColumns.Add("text"); + UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error); + UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 2 of them"); + } + + UNIT_ASSERT_C(ValidateSettings(keyColumns, settings, error), error); UNIT_ASSERT_VALUES_EQUAL(error, ""); } diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index 89cbbdef90d..f032d20d8b0 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -1787,6 +1787,43 @@ message TEvPrefixKMeansResponse { optional NKikimrIndexBuilder.TMeteringStats MeteringStats = 12; } +message TEvBuildFulltextIndexRequest { + optional uint64 Id = 1; + + optional uint64 TabletId = 2; + optional NKikimrProto.TPathID PathId = 3; + + optional uint64 SnapshotTxId = 4; + optional uint64 SnapshotStep = 5; + + optional uint64 SeqNoGeneration = 6; + optional uint64 SeqNoRound = 7; + + optional Ydb.Table.FulltextIndexSettings Settings = 8; + + optional string IndexName = 9; + + repeated string KeyColumns = 10; + repeated string DataColumns = 11; + + optional NKikimrIndexBuilder.TIndexBuildScanSettings ScanSettings = 12; +} + +message TEvBuildFulltextIndexResponse { + optional uint64 Id = 1; + + optional uint64 TabletId = 2; + optional NKikimrProto.TPathID PathId = 3; + + optional uint64 RequestSeqNoGeneration = 4; + optional uint64 RequestSeqNoRound = 5; + + optional NKikimrIndexBuilder.EBuildStatus Status = 6; + repeated Ydb.Issue.IssueMessage Issues = 7; + + optional NKikimrIndexBuilder.TMeteringStats MeteringStats = 8; +} + message TEvCdcStreamScanRequest { message TLimits { optional uint32 BatchMaxBytes = 1 [default = 512000]; diff --git a/ydb/core/tx/datashard/build_index/fulltext.cpp b/ydb/core/tx/datashard/build_index/fulltext.cpp new file mode 100644 index 00000000000..3af00dd7f66 --- /dev/null +++ b/ydb/core/tx/datashard/build_index/fulltext.cpp @@ -0,0 +1,415 @@ +#include "common_helper.h" +#include "../datashard_impl.h" +#include "../scan_common.h" +#include "../upload_stats.h" +#include "../buffer_data.h" + +#include <ydb/core/base/appdata.h> +#include <ydb/core/base/counters.h> +#include <ydb/core/base/fulltext.h> +#include <ydb/core/kqp/common/kqp_types.h> +#include <ydb/core/scheme/scheme_tablecell.h> + +#include <ydb/core/tx/tx_proxy/proxy.h> +#include <ydb/core/tx/tx_proxy/upload_rows.h> + +#include <ydb/core/ydb_convert/table_description.h> +#include <ydb/core/ydb_convert/ydb_convert.h> +#include <yql/essentials/public/issue/yql_issue_message.h> + +#include <util/generic/algorithm.h> +#include <util/string/builder.h> + +namespace NKikimr::NDataShard { +using namespace NTableIndex::NFulltext; +using namespace NKikimr::NFulltext; + +class TBuildFulltextIndexScan: public TActor<TBuildFulltextIndexScan>, public IActorExceptionHandler, public NTable::IScan { + IDriver* Driver = nullptr; + + ui64 TabletId = 0; + ui64 BuildId = 0; + + ui64 ReadRows = 0; + ui64 ReadBytes = 0; + + TTags ScanTags; + TString TextColumn; + Ydb::Table::FulltextIndexSettings::Analyzers TextAnalyzers; + + TBatchRowsUploader Uploader; + TBufferData* UploadBuf = nullptr; + + const NKikimrTxDataShard::TEvBuildFulltextIndexRequest Request; + const TActorId ResponseActorId; + const TAutoPtr<TEvDataShard::TEvBuildFulltextIndexResponse> Response; + +public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() + { + return NKikimrServices::TActivity::BUILD_FULLTEXT_INDEX; + } + + TBuildFulltextIndexScan(ui64 tabletId, const TUserTable& table, NKikimrTxDataShard::TEvBuildFulltextIndexRequest request, + const TActorId& responseActorId, TAutoPtr<TEvDataShard::TEvBuildFulltextIndexResponse>&& response) + : TActor{&TThis::StateWork} + , TabletId(tabletId) + , BuildId{request.GetId()} + , Uploader(request.GetScanSettings()) + , Request(std::move(request)) + , ResponseActorId{responseActorId} + , Response{std::move(response)} + { + LOG_I("Create " << Debug()); + + Y_ENSURE(Request.settings().columns().size() == 1); + TextColumn = Request.settings().columns().at(0).column(); + TextAnalyzers = Request.settings().columns().at(0).analyzers(); + Y_ENSURE(Request.GetKeyColumns().size() == 1); + Y_ENSURE(Request.GetKeyColumns().at(0) == TextColumn); + + auto tags = GetAllTags(table); + auto types = GetAllTypes(table); + + { + ScanTags.push_back(tags.at(TextColumn)); + + for (auto dataColumn : Request.GetDataColumns()) { + if (dataColumn != TextColumn) { + ScanTags.push_back(tags.at(dataColumn)); + } + } + } + + { + auto uploadTypes = std::make_shared<NTxProxy::TUploadTypes>(); + auto addType = [&](const auto& column) { + auto it = types.find(column); + if (it != types.end()) { + Ydb::Type type; + NScheme::ProtoFromTypeInfo(it->second, type); + uploadTypes->emplace_back(it->first, type); + types.erase(it); + } + }; + { + Ydb::Type type; + type.set_type_id(TokenType); + uploadTypes->emplace_back(TokenColumn, type); + } + for (const auto& column : table.KeyColumnIds) { + addType(table.Columns.at(column).Name); + } + for (auto dataColumn : Request.GetDataColumns()) { + addType(dataColumn); + } + UploadBuf = Uploader.AddDestination(Request.GetIndexName(), std::move(uploadTypes)); + } + } + + TInitialState Prepare(IDriver* driver, TIntrusiveConstPtr<TScheme>) final + { + TActivationContext::AsActorContext().RegisterWithSameMailbox(this); + LOG_I("Prepare " << Debug()); + + Driver = driver; + Uploader.SetOwner(SelfId()); + + return {EScan::Feed, {}}; + } + + EScan Seek(TLead& lead, ui64 seq) final + { + LOG_T("Seek " << seq << " " << Debug()); + + if (seq) { + return Uploader.CanFinish() + ? EScan::Final + : EScan::Sleep; + } + + lead.To(ScanTags, {}, NTable::ESeek::Lower); + + return EScan::Feed; + } + + EScan Feed(TArrayRef<const TCell> key, const TRow& row) final + { + // LOG_T("Feed " << Debug()); + + ++ReadRows; + ReadBytes += CountRowCellBytes(key, *row); + + TVector<TCell> uploadKey(::Reserve(key.size() + 1)); + TVector<TCell> uploadValue(::Reserve(Request.GetDataColumns().size())); + + TString text((*row).at(0).AsBuf()); + auto tokens = Analyze(text, TextAnalyzers); + for (const auto& token : tokens) { + uploadKey.clear(); + uploadKey.push_back(TCell(token)); + uploadKey.insert(uploadKey.end(), key.begin(), key.end()); + + uploadValue.clear(); + size_t index = 1; // skip text column + for (auto dataColumn : Request.GetDataColumns()) { + if (dataColumn != TextColumn) { + uploadValue.push_back(row.Get(index++)); + } else { + uploadValue.push_back(TCell(text)); + } + } + + UploadBuf->AddRow(uploadKey, uploadValue); + } + + return Uploader.ShouldWaitUpload() ? EScan::Sleep : EScan::Feed; + } + + EScan PageFault() final + { + LOG_T("PageFault " << Debug()); + return EScan::Feed; + } + + EScan Exhausted() final + { + LOG_T("Exhausted " << Debug()); + + // call Seek to wait uploads + return EScan::Reset; + } + + TAutoPtr<IDestructable> Finish(const std::exception& exc) final + { + Uploader.AddIssue(exc); + return Finish(EStatus::Exception); + } + + TAutoPtr<IDestructable> Finish(EStatus status) final + { + auto& record = Response->Record; + record.MutableMeteringStats()->SetReadRows(ReadRows); + record.MutableMeteringStats()->SetReadBytes(ReadBytes); + record.MutableMeteringStats()->SetCpuTimeUs(Driver->GetTotalCpuTimeUs()); + + Uploader.Finish(record, status); + + if (Response->Record.GetStatus() == NKikimrIndexBuilder::DONE) { + LOG_N("Done " << Debug() << " " << Response->Record.ShortDebugString()); + } else { + LOG_E("Failed " << Debug() << " " << Response->Record.ShortDebugString()); + } + Send(ResponseActorId, Response.Release()); + + Driver = nullptr; + this->PassAway(); + return nullptr; + } + + bool OnUnhandledException(const std::exception& exc) final + { + if (!Driver) { + return false; + } + Driver->Throw(exc); + return true; + } + + void Describe(IOutputStream& out) const final + { + out << Debug(); + } + +protected: + STFUNC(StateWork) + { + switch (ev->GetTypeRewrite()) { + HFunc(TEvTxUserProxy::TEvUploadRowsResponse, Handle); + CFunc(TEvents::TSystem::Wakeup, HandleWakeup); + default: + LOG_E("StateWork unexpected event type: " << ev->GetTypeRewrite() + << " event: " << ev->ToString() << " " << Debug()); + } + } + + void HandleWakeup(const NActors::TActorContext& /*ctx*/) + { + LOG_D("Retry upload " << Debug()); + + Uploader.RetryUpload(); + } + + void Handle(TEvTxUserProxy::TEvUploadRowsResponse::TPtr& ev, const TActorContext& ctx) + { + LOG_D("Handle TEvUploadRowsResponse " << Debug() + << " ev->Sender: " << ev->Sender.ToString()); + + if (!Driver) { + return; + } + + Uploader.Handle(ev); + + if (Uploader.GetUploadStatus().IsSuccess()) { + Driver->Touch(EScan::Feed); + return; + } + + if (auto retryAfter = Uploader.GetRetryAfter(); retryAfter) { + LOG_N("Got retriable error, " << Debug() << " " << Uploader.GetUploadStatus().ToString()); + ctx.Schedule(*retryAfter, new TEvents::TEvWakeup()); + return; + } + + LOG_N("Got error, abort scan, " << Debug() << " " << Uploader.GetUploadStatus().ToString()); + + Driver->Touch(EScan::Final); + } + + TString Debug() const + { + return TStringBuilder() << "TBuildFulltextIndexScan TabletId: " << TabletId << " Id: " << BuildId + << " " << Uploader.Debug(); + } +}; + +class TDataShard::TTxHandleSafeBuildFulltextIndexScan final: public NTabletFlatExecutor::TTransactionBase<TDataShard> { +public: + TTxHandleSafeBuildFulltextIndexScan(TDataShard* self, TEvDataShard::TEvBuildFulltextIndexRequest::TPtr&& ev) + : TTransactionBase(self) + , Ev(std::move(ev)) + { + } + + bool Execute(TTransactionContext&, const TActorContext& ctx) final + { + Self->HandleSafe(Ev, ctx); + return true; + } + + void Complete(const TActorContext&) final + { + } + +private: + TEvDataShard::TEvBuildFulltextIndexRequest::TPtr Ev; +}; + +void TDataShard::Handle(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev, const TActorContext&) +{ + Execute(new TTxHandleSafeBuildFulltextIndexScan(this, std::move(ev))); +} + +void TDataShard::HandleSafe(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev, const TActorContext& ctx) +{ + auto& request = ev->Get()->Record; + const ui64 id = request.GetId(); + TRowVersion rowVersion(request.GetSnapshotStep(), request.GetSnapshotTxId()); + TScanRecord::TSeqNo seqNo = {request.GetSeqNoGeneration(), request.GetSeqNoRound()}; + + try { + auto response = MakeHolder<TEvDataShard::TEvBuildFulltextIndexResponse>(); + FillScanResponseCommonFields(*response, id, TabletID(), seqNo); + + LOG_N("Starting TBuildFulltextIndexScan TabletId: " << TabletID() + << " " << request.ShortDebugString() + << " row version " << rowVersion); + + // Note: it's very unlikely that we have volatile txs before this snapshot + if (VolatileTxManager.HasVolatileTxsAtSnapshot(rowVersion)) { + VolatileTxManager.AttachWaitingSnapshotEvent(rowVersion, std::unique_ptr<IEventHandle>(ev.Release())); + return; + } + + auto badRequest = [&](const TString& error) { + response->Record.SetStatus(NKikimrIndexBuilder::EBuildStatus::BAD_REQUEST); + auto issue = response->Record.AddIssues(); + issue->set_severity(NYql::TSeverityIds::S_ERROR); + issue->set_message(error); + }; + auto trySendBadRequest = [&] { + if (response->Record.GetStatus() == NKikimrIndexBuilder::EBuildStatus::BAD_REQUEST) { + LOG_E("Rejecting TBuildFulltextIndexScan bad request TabletId: " << TabletID() + << " " << request.ShortDebugString() + << " with response " << response->Record.ShortDebugString()); + ctx.Send(ev->Sender, std::move(response)); + return true; + } else { + return false; + } + }; + + // 1. Validating table and path existence + if (request.GetTabletId() != TabletID()) { + badRequest(TStringBuilder() << "Wrong shard " << request.GetTabletId() << " this is " << TabletID()); + } + if (!IsStateActive()) { + badRequest(TStringBuilder() << "Shard " << TabletID() << " is " << State << " and not ready for requests"); + } + const auto pathId = TPathId::FromProto(request.GetPathId()); + const auto* userTableIt = GetUserTables().FindPtr(pathId.LocalPathId); + if (!userTableIt) { + badRequest(TStringBuilder() << "Unknown table id: " << pathId.LocalPathId); + } + if (trySendBadRequest()) { + return; + } + const auto& userTable = **userTableIt; + + // 2. Validating request fields + if (!request.HasSnapshotStep() || !request.HasSnapshotTxId()) { + badRequest(TStringBuilder() << "Missing snapshot"); + } else { + const TSnapshotKey snapshotKey(pathId, rowVersion.Step, rowVersion.TxId); + if (!SnapshotManager.FindAvailable(snapshotKey)) { + badRequest(TStringBuilder() << "Unknown snapshot for path id " << pathId.OwnerId << ":" << pathId.LocalPathId + << ", snapshot step is " << snapshotKey.Step << ", snapshot tx is " << snapshotKey.TxId); + } + } + + if (!request.GetIndexName()) { + badRequest(TStringBuilder() << "Empty index table name"); + } + + auto tags = GetAllTags(userTable); + for (auto keyColumn : request.GetKeyColumns()) { + if (!tags.contains(keyColumn)) { + badRequest(TStringBuilder() << "Unknown key column: " << keyColumn); + } + } + for (auto dataColumn : request.GetDataColumns()) { + if (!tags.contains(dataColumn)) { + badRequest(TStringBuilder() << "Unknown data column: " << dataColumn); + } + } + + if (trySendBadRequest()) { + return; + } + + // 3. Validating fulltext index settings + if (!request.HasSettings()) { + badRequest(TStringBuilder() << "Missing fulltext index settings"); + } else { + TString error; + if (!NKikimr::NFulltext::ValidateSettings(request.keycolumns(), request.GetSettings(), error)) { + badRequest(error); + } + } + + if (trySendBadRequest()) { + return; + } + + // 4. Creating scan + TAutoPtr<NTable::IScan> scan = new TBuildFulltextIndexScan(TabletID(), userTable, + request, ev->Sender, std::move(response)); + + StartScan(this, std::move(scan), id, seqNo, rowVersion, userTable.LocalTid); + } catch (const std::exception& exc) { + FailScan<TEvDataShard::TEvBuildFulltextIndexResponse>(id, TabletID(), ev->Sender, seqNo, exc, "TBuildFulltextIndexScan"); + } +} + +} diff --git a/ydb/core/tx/datashard/build_index/secondary_index.cpp b/ydb/core/tx/datashard/build_index/secondary_index.cpp index 20e68c03a5b..40342b06156 100644 --- a/ydb/core/tx/datashard/build_index/secondary_index.cpp +++ b/ydb/core/tx/datashard/build_index/secondary_index.cpp @@ -577,12 +577,13 @@ void TDataShard::HandleSafe(TEvDataShard::TEvBuildIndexCreateRequest::TPtr& ev, // 2. Validating request fields if (!request.HasSnapshotStep() || !request.HasSnapshotTxId()) { - badRequest(TStringBuilder() << "Empty snapshot"); - } - const TSnapshotKey snapshotKey(tableId.PathId, rowVersion.Step, rowVersion.TxId); - if (!SnapshotManager.FindAvailable(snapshotKey)) { - badRequest(TStringBuilder() << "Unknown snapshot for path id " << tableId.PathId.OwnerId << ":" << tableId.PathId.LocalPathId - << ", snapshot step is " << snapshotKey.Step << ", snapshot tx is " << snapshotKey.TxId); + badRequest(TStringBuilder() << "Missing snapshot"); + } else { + const TSnapshotKey snapshotKey(tableId.PathId, rowVersion.Step, rowVersion.TxId); + if (!SnapshotManager.FindAvailable(snapshotKey)) { + badRequest(TStringBuilder() << "Unknown snapshot for path id " << tableId.PathId.OwnerId << ":" << tableId.PathId.LocalPathId + << ", snapshot step is " << snapshotKey.Step << ", snapshot tx is " << snapshotKey.TxId); + } } TSerializedTableRange requestedRange; diff --git a/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp b/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp new file mode 100644 index 00000000000..39a9b67eee8 --- /dev/null +++ b/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp @@ -0,0 +1,367 @@ +#include "ut_helpers.h" + +#include <ydb/core/base/table_index.h> +#include <ydb/core/protos/index_builder.pb.h> +#include <ydb/core/testlib/test_client.h> +#include <ydb/core/tx/datashard/ut_common/datashard_ut_common.h> +#include <ydb/core/tx/schemeshard/schemeshard.h> +#include <ydb/core/tx/tx_proxy/proxy.h> +#include <ydb/core/tx/tx_proxy/upload_rows.h> + +#include <yql/essentials/public/issue/yql_issue_message.h> + +#include <library/cpp/testing/unittest/registar.h> + +namespace NKikimr { +using namespace Tests; +using Ydb::Table::FulltextIndexSettings; +using namespace NTableIndex::NFulltext; + +static std::atomic<ui64> sId = 1; +static const TString kMainTable = "/Root/table-main"; +static const TString kIndexTable = "/Root/table-index"; + +Y_UNIT_TEST_SUITE(TTxDataShardBuildFulltextIndexScan) { + + ui64 FillRequest(Tests::TServer::TPtr server, TActorId sender, + NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request, + std::function<void(NKikimrTxDataShard::TEvBuildFulltextIndexRequest&)> setupRequest) + { + auto id = sId.fetch_add(1, std::memory_order_relaxed); + + auto snapshot = CreateVolatileSnapshot(server, {kMainTable}); + auto datashards = GetTableShards(server, sender, kMainTable); + TTableId tableId = ResolveTableId(server, sender, kMainTable); + + UNIT_ASSERT(datashards.size() == 1); + + request.SetId(1); + request.SetSeqNoGeneration(id); + request.SetSeqNoRound(1); + + request.SetTabletId(datashards[0]); + tableId.PathId.ToProto(request.MutablePathId()); + + request.SetSnapshotTxId(snapshot.TxId); + request.SetSnapshotStep(snapshot.Step); + + FulltextIndexSettings settings; + settings.set_layout(FulltextIndexSettings::FLAT); + auto column = settings.add_columns(); + column->set_column("text"); + column->mutable_analyzers()->set_tokenizer(FulltextIndexSettings::WHITESPACE); + *request.MutableSettings() = settings; + + request.SetIndexName(kIndexTable); + + request.AddKeyColumns("text"); + + setupRequest(request); + + return datashards[0]; + } + + void DoBadRequest(Tests::TServer::TPtr server, TActorId sender, + std::function<void(NKikimrTxDataShard::TEvBuildFulltextIndexRequest&)> setupRequest, + const TString& expectedError, bool expectedErrorSubstring = false, NKikimrIndexBuilder::EBuildStatus expectedStatus = NKikimrIndexBuilder::EBuildStatus::BAD_REQUEST) + { + auto ev = std::make_unique<TEvDataShard::TEvBuildFulltextIndexRequest>(); + + auto tabletId = FillRequest(server, sender, ev->Record, setupRequest); + + NKikimr::DoBadRequest<TEvDataShard::TEvBuildFulltextIndexResponse>(server, sender, std::move(ev), tabletId, expectedError, expectedErrorSubstring, expectedStatus); + } + + TString DoBuild(Tests::TServer::TPtr server, TActorId sender, std::function<void(NKikimrTxDataShard::TEvBuildFulltextIndexRequest&)> setupRequest) { + auto ev1 = std::make_unique<TEvDataShard::TEvBuildFulltextIndexRequest>(); + auto tabletId = FillRequest(server, sender, ev1->Record, setupRequest); + + auto ev2 = std::make_unique<TEvDataShard::TEvBuildFulltextIndexRequest>(); + ev2->Record.CopyFrom(ev1->Record); + + auto& runtime = *server->GetRuntime(); + runtime.SendToPipe(tabletId, sender, ev1.release(), 0, GetPipeConfigWithRetries()); + runtime.SendToPipe(tabletId, sender, ev2.release(), 0, GetPipeConfigWithRetries()); + + TAutoPtr<IEventHandle> handle; + auto reply = runtime.GrabEdgeEventRethrow<TEvDataShard::TEvBuildFulltextIndexResponse>(handle); + + UNIT_ASSERT_EQUAL_C(reply->Record.GetStatus(), NKikimrIndexBuilder::EBuildStatus::DONE, reply->Record.ShortDebugString()); + + auto index = ReadShardedTable(server, kIndexTable); + Cerr << "Index:" << Endl; + Cerr << index << Endl; + return std::move(index); + } + + void CreateMainTable(Tests::TServer::TPtr server, TActorId sender) { + TShardedTableOptions options; + options.EnableOutOfOrder(true); + options.Shards(1); + options.AllowSystemColumnNames(false); + options.Columns({ + {"key", "Uint32", true, true}, + {"text", "String", false, false}, + {"data", "String", false, false}, + }); + CreateShardedTable(server, sender, "/Root", "table-main", options); + } + + void FillMainTable(Tests::TServer::TPtr server, TActorId sender) { + ExecSQL(server, sender, R"( + UPSERT INTO `/Root/table-main` (key, text, data) VALUES + (1, "green apple", "one"), + (2, "red apple", "two"), + (3, "yellow apple", "three"), + (4, "red car", "four") + )"); + } + + void CreateIndexTable(Tests::TServer::TPtr server, TActorId sender) { + TShardedTableOptions options; + options.EnableOutOfOrder(true); + options.Shards(1); + options.AllowSystemColumnNames(true); + options.Columns({ + {TokenColumn, NTableIndex::NFulltext::TokenTypeName, true, true}, + {"key", "Uint32", true, true}, + {"data", "String", false, false}, + }); + CreateShardedTable(server, sender, "/Root", "table-index", options); + } + + void Setup(Tests::TServer::TPtr server, TActorId sender) { + server->GetRuntime()->SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG); + server->GetRuntime()->SetLogPriority(NKikimrServices::BUILD_INDEX, NLog::PRI_TRACE); + + InitRoot(server, sender); + + CreateMainTable(server, sender); + FillMainTable(server, sender); + CreateIndexTable(server, sender); + } + + Y_UNIT_TEST(BadRequest) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root"); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto sender = server->GetRuntime()->AllocateEdgeActor(); + + Setup(server, sender); + + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.SetTabletId(0); + }, TStringBuilder() << "{ <main>: Error: Wrong shard 0 this is " << GetTableShards(server, sender, kMainTable)[0] << " }"); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + TPathId(0, 0).ToProto(request.MutablePathId()); + }, "{ <main>: Error: Unknown table id: 0 }"); + + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.SetSnapshotStep(request.GetSnapshotStep() + 1); + }, "Error: Unknown snapshot", true); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.ClearSnapshotStep(); + }, "{ <main>: Error: Missing snapshot }"); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.SetSnapshotTxId(request.GetSnapshotTxId() + 1); + }, "Error: Unknown snapshot", true); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.ClearSnapshotTxId(); + }, "{ <main>: Error: Missing snapshot }"); + + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.clear_settings(); + }, "{ <main>: Error: Missing fulltext index settings }"); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.MutableSettings()->clear_columns(); + }, "{ <main>: Error: fulltext index should have a single text key column settings but have 0 of them }"); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.MutableSettings()->mutable_columns()->at(0).mutable_analyzers()->clear_tokenizer(); + }, "{ <main>: Error: tokenizer should be set }"); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.MutableSettings()->mutable_columns()->at(0).set_column("data"); + }, "{ <main>: Error: fulltext index should have a single text key column text settings but have data }"); + + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.ClearIndexName(); + }, "{ <main>: Error: Empty index table name }"); + + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.ClearKeyColumns(); + }, "{ <main>: Error: fulltext index should have a single text key column but have 0 of them }"); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.ClearKeyColumns(); + request.AddKeyColumns("some"); + }, "{ <main>: Error: Unknown key column: some }"); + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.AddDataColumns("some"); + }, "{ <main>: Error: Unknown data column: some }"); + + // test multiple issues: + DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { + request.ClearIndexName(); + request.ClearKeyColumns(); + request.AddKeyColumns("some"); + }, "[ { <main>: Error: Empty index table name } { <main>: Error: Unknown key column: some } ]"); + } + + Y_UNIT_TEST(Build) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root"); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto sender = server->GetRuntime()->AllocateEdgeActor(); + + Setup(server, sender); + + auto result = DoBuild(server, sender, [](auto&){}); + + UNIT_ASSERT_VALUES_EQUAL(result, R"(__ydb_token = apple, key = 1, data = (empty maybe) +__ydb_token = apple, key = 2, data = (empty maybe) +__ydb_token = apple, key = 3, data = (empty maybe) +__ydb_token = car, key = 4, data = (empty maybe) +__ydb_token = green, key = 1, data = (empty maybe) +__ydb_token = red, key = 2, data = (empty maybe) +__ydb_token = red, key = 4, data = (empty maybe) +__ydb_token = yellow, key = 3, data = (empty maybe) +)"); + } + + Y_UNIT_TEST(BuildWithData) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root"); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto sender = server->GetRuntime()->AllocateEdgeActor(); + + Setup(server, sender); + + auto result = DoBuild(server, sender, [](auto& request) { + request.AddDataColumns("data"); + }); + + UNIT_ASSERT_VALUES_EQUAL(result, R"(__ydb_token = apple, key = 1, data = one +__ydb_token = apple, key = 2, data = two +__ydb_token = apple, key = 3, data = three +__ydb_token = car, key = 4, data = four +__ydb_token = green, key = 1, data = one +__ydb_token = red, key = 2, data = two +__ydb_token = red, key = 4, data = four +__ydb_token = yellow, key = 3, data = three +)"); + } + + Y_UNIT_TEST(BuildWithTextData) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root"); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto sender = server->GetRuntime()->AllocateEdgeActor(); + + InitRoot(server, sender); + + CreateMainTable(server, sender); + FillMainTable(server, sender); + + { // CreateIndexTable with text column + TShardedTableOptions options; + options.EnableOutOfOrder(true); + options.Shards(1); + options.AllowSystemColumnNames(true); + options.Columns({ + {TokenColumn, NTableIndex::NFulltext::TokenTypeName, true, true}, + {"key", "Uint32", true, true}, + {"text", "String", false, false}, + {"data", "String", false, false}, + }); + CreateShardedTable(server, sender, "/Root", "table-index", options); + } + + auto result = DoBuild(server, sender, [](auto& request) { + request.AddDataColumns("text"); + request.AddDataColumns("data"); + }); + + UNIT_ASSERT_VALUES_EQUAL(result, R"(__ydb_token = apple, key = 1, text = green apple, data = one +__ydb_token = apple, key = 2, text = red apple, data = two +__ydb_token = apple, key = 3, text = yellow apple, data = three +__ydb_token = car, key = 4, text = red car, data = four +__ydb_token = green, key = 1, text = green apple, data = one +__ydb_token = red, key = 2, text = red apple, data = two +__ydb_token = red, key = 4, text = red car, data = four +__ydb_token = yellow, key = 3, text = yellow apple, data = three +)"); + } + + Y_UNIT_TEST(BuildWithTextFromKey) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root"); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto sender = server->GetRuntime()->AllocateEdgeActor(); + + server->GetRuntime()->SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG); + server->GetRuntime()->SetLogPriority(NKikimrServices::BUILD_INDEX, NLog::PRI_TRACE); + + InitRoot(server, sender); + + { // CreateMainTable + TShardedTableOptions options; + options.EnableOutOfOrder(true); + options.Shards(1); + options.AllowSystemColumnNames(false); + options.Columns({ + {"key", "Uint32", true, true}, + {"text", "String", true, true}, + {"subkey", "Uint32", true, true}, + {"data", "String", false, false}, + }); + CreateShardedTable(server, sender, "/Root", "table-main", options); + } + { // FillMainTable + ExecSQL(server, sender, R"( + UPSERT INTO `/Root/table-main` (key, text, subkey, data) VALUES + (1, "green apple", 11, "one"), + (2, "red apple", 22, "two"), + (3, "yellow apple", 33, "three"), + (4, "red car", 44, "four") + )"); + } + { // CreateIndexTable + TShardedTableOptions options; + options.EnableOutOfOrder(true); + options.Shards(1); + options.AllowSystemColumnNames(true); + options.Columns({ + {TokenColumn, NTableIndex::NFulltext::TokenTypeName, true, true}, + {"key", "Uint32", true, true}, + {"text", "String", true, true}, + {"subkey", "Uint32", true, true}, + {"data", "String", false, false}, + }); + CreateShardedTable(server, sender, "/Root", "table-index", options); + } + + auto result = DoBuild(server, sender, [](auto& request) { + request.AddDataColumns("data"); + }); + + UNIT_ASSERT_VALUES_EQUAL(result, R"(__ydb_token = apple, key = 1, text = green apple, subkey = 11, data = one +__ydb_token = apple, key = 2, text = red apple, subkey = 22, data = two +__ydb_token = apple, key = 3, text = yellow apple, subkey = 33, data = three +__ydb_token = car, key = 4, text = red car, subkey = 44, data = four +__ydb_token = green, key = 1, text = green apple, subkey = 11, data = one +__ydb_token = red, key = 2, text = red apple, subkey = 22, data = two +__ydb_token = red, key = 4, text = red car, subkey = 44, data = four +__ydb_token = yellow, key = 3, text = yellow apple, subkey = 33, data = three +)"); + } +} + +} diff --git a/ydb/core/tx/datashard/build_index/ut/ya.make b/ydb/core/tx/datashard/build_index/ut/ya.make index 8da7981617c..b71af3fef00 100644 --- a/ydb/core/tx/datashard/build_index/ut/ya.make +++ b/ydb/core/tx/datashard/build_index/ut/ya.make @@ -27,6 +27,7 @@ PEERDIR( YQL_LAST_ABI_VERSION() SRCS( + ut_fulltext.cpp ut_local_kmeans.cpp ut_prefix_kmeans.cpp ut_recompute_kmeans.cpp diff --git a/ydb/core/tx/datashard/datashard.h b/ydb/core/tx/datashard/datashard.h index 47becb18023..dabc4175395 100644 --- a/ydb/core/tx/datashard/datashard.h +++ b/ydb/core/tx/datashard/datashard.h @@ -367,6 +367,9 @@ namespace TEvDataShard { EvIncrementalRestoreResponse, + EvBuildFulltextIndexRequest, + EvBuildFulltextIndexResponse, + EvEnd }; @@ -1560,6 +1563,18 @@ namespace TEvDataShard { TEvDataShard::EvPrefixKMeansResponse> { }; + struct TEvBuildFulltextIndexRequest + : public TEventPB<TEvBuildFulltextIndexRequest, + NKikimrTxDataShard::TEvBuildFulltextIndexRequest, + TEvDataShard::EvBuildFulltextIndexRequest> { + }; + + struct TEvBuildFulltextIndexResponse + : public TEventPB<TEvBuildFulltextIndexResponse, + NKikimrTxDataShard::TEvBuildFulltextIndexResponse, + TEvDataShard::EvBuildFulltextIndexResponse> { + }; + struct TEvIncrementalRestoreResponse : public TEventPB<TEvIncrementalRestoreResponse, NKikimrTxDataShard::TEvIncrementalRestoreResponse, diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h index 52f48f77cd6..29a967d6aaf 100644 --- a/ydb/core/tx/datashard/datashard_impl.h +++ b/ydb/core/tx/datashard/datashard_impl.h @@ -241,6 +241,7 @@ class TDataShard class TTxHandleSafeReshuffleKMeansScan; class TTxHandleSafeRecomputeKMeansScan; class TTxHandleSafeStatisticsScan; + class TTxHandleSafeBuildFulltextIndexScan; class TTxMediatorStateRestored; @@ -1342,6 +1343,8 @@ class TDataShard void HandleSafe(TEvDataShard::TEvLocalKMeansRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvDataShard::TEvPrefixKMeansRequest::TPtr& ev, const TActorContext& ctx); void HandleSafe(TEvDataShard::TEvPrefixKMeansRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev, const TActorContext& ctx); + void HandleSafe(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvDataShard::TEvCdcStreamScanRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvCdcStreamScanRegistered::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvCdcStreamScanProgress::TPtr& ev, const TActorContext& ctx); @@ -3225,6 +3228,7 @@ protected: HFunc(TEvDataShard::TEvRecomputeKMeansRequest, Handle); HFunc(TEvDataShard::TEvLocalKMeansRequest, Handle); HFunc(TEvDataShard::TEvPrefixKMeansRequest, Handle); + HFunc(TEvDataShard::TEvBuildFulltextIndexRequest, Handle); HFunc(TEvDataShard::TEvCdcStreamScanRequest, Handle); HFunc(TEvPrivate::TEvCdcStreamScanRegistered, Handle); HFunc(TEvPrivate::TEvCdcStreamScanProgress, Handle); diff --git a/ydb/core/tx/datashard/ya.make b/ydb/core/tx/datashard/ya.make index c210e96e4f9..ee92b38d455 100644 --- a/ydb/core/tx/datashard/ya.make +++ b/ydb/core/tx/datashard/ya.make @@ -218,13 +218,14 @@ SRCS( wait_for_plan_unit.cpp wait_for_stream_clearance_unit.cpp - build_index/prefix_kmeans.cpp + build_index/fulltext.cpp build_index/kmeans_helper.cpp build_index/local_kmeans.cpp - build_index/sample_k.cpp - build_index/secondary_index.cpp + build_index/prefix_kmeans.cpp build_index/recompute_kmeans.cpp build_index/reshuffle_kmeans.cpp + build_index/sample_k.cpp + build_index/secondary_index.cpp build_index/unique_index.cpp ) diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp index fc648c9dda8..4ee836c4c14 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp @@ -153,7 +153,7 @@ TVector<ISubOperation::TPtr> CreateIndexedTable(TOperationId nextId, const TTxTr return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Fulltext index support is disabled")}; } TString msg; - if (!NKikimr::NFulltext::ValidateSettings(indexDescription.GetFulltextIndexDescription().GetSettings(), msg)) { + if (!NKikimr::NFulltext::ValidateSettings(indexDescription.keycolumnnames(), indexDescription.GetFulltextIndexDescription().GetSettings(), msg)) { return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, msg)}; } break; diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 0b83aae3012..6dc406e0552 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -282,7 +282,7 @@ private: buildInfo.IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalFulltext; NKikimrSchemeOp::TFulltextIndexDescription fulltextIndexDescription; *fulltextIndexDescription.MutableSettings() = index.global_fulltext_index().fulltext_settings(); - if (!NKikimr::NFulltext::ValidateSettings(fulltextIndexDescription.GetSettings(), explain)) { + if (!NKikimr::NFulltext::ValidateSettings(index.index_columns(), fulltextIndexDescription.GetSettings(), explain)) { return false; } buildInfo.SpecializedIndexDescription = fulltextIndexDescription; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index 9b089450927..ad99fca8054 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -176,13 +176,13 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } - const TString& indexColumnName = indexKeys.KeyColumns.back(); - Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName)); - auto typeInfo = baseColumnTypes.at(indexColumnName); + const TString& embeddingColumnName = indexKeys.KeyColumns.back(); + Y_ABORT_UNLESS(baseColumnTypes.contains(embeddingColumnName)); + auto typeInfo = baseColumnTypes.at(embeddingColumnName); if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) { status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); + error = TStringBuilder() << "Embedding column '" << embeddingColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); return false; } break; @@ -193,30 +193,18 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat if (indexKeys.KeyColumns.size() > 1) { status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = TStringBuilder() << "fulltext index can only have a single key text column"; + error = TStringBuilder() << "fulltext index should have a single text key column"; return false; } - if (indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().size() != 1) { - status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = TStringBuilder() << "fulltext index should have single '" << indexKeys.KeyColumns.at(0) << "' column settings" - << " but have " << indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().size() << " of them"; - return false; - } - if (indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().at(0).Getcolumn() != indexKeys.KeyColumns.at(0)) { - status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = TStringBuilder() << "fulltext index should have '" << indexKeys.KeyColumns.at(0) << "' column settings" - << " but have '" << indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().at(0).Getcolumn() << "' column settings"; - return false; - } - - const TString& indexColumnName = indexKeys.KeyColumns.back(); - Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName)); - auto typeInfo = baseColumnTypes.at(indexColumnName); + + const TString& textColumnName = indexKeys.KeyColumns.at(0); + Y_ABORT_UNLESS(baseColumnTypes.contains(textColumnName)); + auto typeInfo = baseColumnTypes.at(textColumnName); // TODO: support utf-8 in fulltext index if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) { status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); + error = TStringBuilder() << "Text column '" << textColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); return false; } break; diff --git a/ydb/library/services/services.proto b/ydb/library/services/services.proto index 93c74186a9c..3941e270baa 100644 --- a/ydb/library/services/services.proto +++ b/ydb/library/services/services.proto @@ -1119,5 +1119,6 @@ message TActivity { SCHEME_BOARD_RESTORE_ACTOR = 669; REPLICATION_CONTROLLER_RESOURCE_ID_RESOLVER = 670; BS_VDISK_METADATA_ACTOR = 671; + BUILD_FULLTEXT_INDEX = 672; }; }; |