aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryumkam <yumkam7@ydb.tech>2024-06-21 12:30:29 +0300
committerGitHub <noreply@github.com>2024-06-21 11:30:29 +0200
commit511bb2768292937c805b404f0ffc231a4dc04195 (patch)
tree0bed765180e34bd735d7897e0080bf7a33fb2c5d
parent1ef5e49a2cda19be7c36ac3afb8eba5c853119d1 (diff)
downloadydb-511bb2768292937c805b404f0ffc231a4dc04195.tar.gz
grace_join: fix mixup ("","foo") == ("foo", "") in string key matches (#5721)
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp19
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp6
2 files changed, 19 insertions, 6 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp
index 481e6df1cd..8c6a43258c 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp
@@ -33,6 +33,9 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings
// Processing variable length string columns
if ( NumberOfKeyStringColumns != 0 || NumberOfKeyIColumns != 0) {
+ totalBytesForStrings += sizeof(ui32)*NumberOfKeyStringColumns;
+ totalBytesForStrings += sizeof(ui32)*NumberOfKeyIColumns;
+
for( ui64 i = 0; i < NumberOfKeyStringColumns; i++ ) {
totalBytesForStrings += stringsSizes[i];
}
@@ -55,11 +58,15 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings
char * currStrPtr = reinterpret_cast< char* > (startPtr);
for( ui64 i = 0; i < NumberOfKeyStringColumns; i++) {
+ WriteUnaligned<ui32>(currStrPtr, stringsSizes[i] );
+ currStrPtr+=sizeof(ui32);
std::memcpy(currStrPtr, stringColumns[i], stringsSizes[i] );
currStrPtr+=stringsSizes[i];
}
for( ui64 i = 0; i < NumberOfKeyIColumns; i++) {
+ WriteUnaligned<ui32>(currStrPtr, IColumnsVals[i].size() );
+ currStrPtr+=sizeof(ui32);
std::memcpy(currStrPtr, IColumnsVals[i].data(), IColumnsVals[i].size() );
currStrPtr+=IColumnsVals[i].size();
}
@@ -214,13 +221,15 @@ inline bool CompareIColumns( const ui32* stringSizes1, const char * vals1,
for (ui32 i = 0; i < nStringColumns; i ++) {
currSize1 = *(stringSizes1 + i);
currSize2 = *(stringSizes2 + i);
- currOffset1 += currSize1;
- currOffset2 += currSize2;
+ currOffset1 += currSize1 + sizeof(ui32);
+ currOffset2 += currSize2 + sizeof(ui32);
}
for (ui32 i = 0; i < nIColumns; i ++) {
currSize1 = *(stringSizes1 + nStringColumns + i );
currSize2 = *(stringSizes2 + nStringColumns + i );
+ currOffset1 += sizeof(ui32);
+ currOffset2 += sizeof(ui32);
str1 = TStringBuf(vals1 + currOffset1, currSize1);
val1 = (colInterfaces + i)->Packer->Unpack(str1, colInterfaces->HolderFactory);
str2 = TStringBuf(vals2 + currOffset2, currSize2 );
@@ -599,13 +608,17 @@ inline void TTable::GetTupleData(ui32 bucketNum, ui32 tupleId, TupleData & td) {
for (ui64 i = 0; i < NumberOfKeyStringColumns; ++i)
{
- td.StrColumns[i] = strPtr;
td.StrSizes[i] = tb.StringsOffsets[stringsOffsetsIdx + 2 + i];
+ Y_ENSURE(ReadUnaligned<ui32>(strPtr) == td.StrSizes[i]);
+ strPtr += sizeof(ui32);
+ td.StrColumns[i] = strPtr;
strPtr += td.StrSizes[i];
}
for ( ui64 i = 0; i < NumberOfKeyIColumns; i++) {
ui32 currSize = tb.StringsOffsets[stringsOffsetsIdx + 2 + NumberOfKeyStringColumns + i];
+ Y_ENSURE(ReadUnaligned<ui32>(strPtr) == currSize);
+ strPtr += sizeof(ui32);
*(td.IColumns + i) = (ColInterfaces + i)->Packer->Unpack(TStringBuf(strPtr, currSize), ColInterfaces->HolderFactory);
strPtr += currSize;
}
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp
index 1884f5e4b5..647605bda0 100644
--- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp
@@ -1121,14 +1121,14 @@ Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinTest) {
NUdf::TUnboxedValue tuple;
UNIT_ASSERT(iterator.Next(tuple));
+ UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
+ UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
+ UNIT_ASSERT(iterator.Next(tuple));
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
UNIT_ASSERT(iterator.Next(tuple));
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Z");
- UNIT_ASSERT(iterator.Next(tuple));
- UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
- UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
UNIT_ASSERT(!iterator.Next(tuple));
}