diff options
author | yumkam <yumkam7@ydb.tech> | 2024-06-21 12:30:29 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-21 11:30:29 +0200 |
commit | 511bb2768292937c805b404f0ffc231a4dc04195 (patch) | |
tree | 0bed765180e34bd735d7897e0080bf7a33fb2c5d | |
parent | 1ef5e49a2cda19be7c36ac3afb8eba5c853119d1 (diff) | |
download | ydb-511bb2768292937c805b404f0ffc231a4dc04195.tar.gz |
grace_join: fix mixup ("","foo") == ("foo", "") in string key matches (#5721)
-rw-r--r-- | ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp | 19 | ||||
-rw-r--r-- | ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp | 6 |
2 files changed, 19 insertions, 6 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp index 481e6df1cd..8c6a43258c 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp @@ -33,6 +33,9 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings // Processing variable length string columns if ( NumberOfKeyStringColumns != 0 || NumberOfKeyIColumns != 0) { + totalBytesForStrings += sizeof(ui32)*NumberOfKeyStringColumns; + totalBytesForStrings += sizeof(ui32)*NumberOfKeyIColumns; + for( ui64 i = 0; i < NumberOfKeyStringColumns; i++ ) { totalBytesForStrings += stringsSizes[i]; } @@ -55,11 +58,15 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings char * currStrPtr = reinterpret_cast< char* > (startPtr); for( ui64 i = 0; i < NumberOfKeyStringColumns; i++) { + WriteUnaligned<ui32>(currStrPtr, stringsSizes[i] ); + currStrPtr+=sizeof(ui32); std::memcpy(currStrPtr, stringColumns[i], stringsSizes[i] ); currStrPtr+=stringsSizes[i]; } for( ui64 i = 0; i < NumberOfKeyIColumns; i++) { + WriteUnaligned<ui32>(currStrPtr, IColumnsVals[i].size() ); + currStrPtr+=sizeof(ui32); std::memcpy(currStrPtr, IColumnsVals[i].data(), IColumnsVals[i].size() ); currStrPtr+=IColumnsVals[i].size(); } @@ -214,13 +221,15 @@ inline bool CompareIColumns( const ui32* stringSizes1, const char * vals1, for (ui32 i = 0; i < nStringColumns; i ++) { currSize1 = *(stringSizes1 + i); currSize2 = *(stringSizes2 + i); - currOffset1 += currSize1; - currOffset2 += currSize2; + currOffset1 += currSize1 + sizeof(ui32); + currOffset2 += currSize2 + sizeof(ui32); } for (ui32 i = 0; i < nIColumns; i ++) { currSize1 = *(stringSizes1 + nStringColumns + i ); currSize2 = *(stringSizes2 + nStringColumns + i ); + currOffset1 += sizeof(ui32); + currOffset2 += sizeof(ui32); str1 = TStringBuf(vals1 + currOffset1, currSize1); val1 = (colInterfaces + i)->Packer->Unpack(str1, colInterfaces->HolderFactory); str2 = TStringBuf(vals2 + currOffset2, currSize2 ); @@ -599,13 +608,17 @@ inline void TTable::GetTupleData(ui32 bucketNum, ui32 tupleId, TupleData & td) { for (ui64 i = 0; i < NumberOfKeyStringColumns; ++i) { - td.StrColumns[i] = strPtr; td.StrSizes[i] = tb.StringsOffsets[stringsOffsetsIdx + 2 + i]; + Y_ENSURE(ReadUnaligned<ui32>(strPtr) == td.StrSizes[i]); + strPtr += sizeof(ui32); + td.StrColumns[i] = strPtr; strPtr += td.StrSizes[i]; } for ( ui64 i = 0; i < NumberOfKeyIColumns; i++) { ui32 currSize = tb.StringsOffsets[stringsOffsetsIdx + 2 + NumberOfKeyStringColumns + i]; + Y_ENSURE(ReadUnaligned<ui32>(strPtr) == currSize); + strPtr += sizeof(ui32); *(td.IColumns + i) = (ColInterfaces + i)->Packer->Unpack(TStringBuf(strPtr, currSize), ColInterfaces->HolderFactory); strPtr += currSize; } diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp index 1884f5e4b5..647605bda0 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp @@ -1121,14 +1121,14 @@ Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinTest) { NUdf::TUnboxedValue tuple; UNIT_ASSERT(iterator.Next(tuple)); + UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B"); + UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X"); + UNIT_ASSERT(iterator.Next(tuple)); UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C"); UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y"); UNIT_ASSERT(iterator.Next(tuple)); UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C"); UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Z"); - UNIT_ASSERT(iterator.Next(tuple)); - UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B"); - UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X"); UNIT_ASSERT(!iterator.Next(tuple)); } |