diff options
author | mrlolthe1st <[email protected]> | 2023-08-10 11:03:24 +0300 |
---|---|---|
committer | mrlolthe1st <[email protected]> | 2023-08-10 11:50:39 +0300 |
commit | 19649bbc29600a1b4b7401da9562c506f765b20f (patch) | |
tree | 15645bd311c613de7ca61003a38e28c0769fbe8d | |
parent | 28701169b2d4ec7688af3de345c99333bb433546 (diff) |
YQL-15949: Support skip of complex types
YQL-15949: Support skip of complex types
13 files changed, 426 insertions, 102 deletions
diff --git a/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp b/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp index f4a2f3fc28a..254a7a1e35b 100644 --- a/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp +++ b/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp @@ -2285,6 +2285,38 @@ NUdf::TUnboxedValue ReadYsonValuePg(TPgType* type, char cmd, TInputBuf& buf) { return PgValueFromString(s, type->GetTypeId()); } +void SkipSkiffPg(TPgType* type, NCommon::TInputBuf& buf) { + auto marker = buf.Read(); + if (!marker) { + return; + } + + switch (type->GetTypeId()) { + case BOOLOID: { + buf.Read(); + return; + } + case INT2OID: + case INT4OID: + case INT8OID: { + buf.SkipMany(sizeof(i64)); + return; + } + case FLOAT4OID: + case FLOAT8OID: { + buf.SkipMany(sizeof(double)); + return; + } + default: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + buf.SkipMany(size); + return; + } + } +} + NUdf::TUnboxedValue ReadSkiffPg(TPgType* type, NCommon::TInputBuf& buf) { auto marker = buf.Read(); if (!marker) { diff --git a/ydb/library/yql/parser/pg_wrapper/interface/codec.h b/ydb/library/yql/parser/pg_wrapper/interface/codec.h index 979f4915041..f672a8cad1d 100644 --- a/ydb/library/yql/parser/pg_wrapper/interface/codec.h +++ b/ydb/library/yql/parser/pg_wrapper/interface/codec.h @@ -39,6 +39,8 @@ void WriteYsonValueInTableFormatPg(TOutputBuf& buf, NKikimr::NMiniKQL::TPgType* NUdf::TUnboxedValue ReadYsonValueInTableFormatPg(NKikimr::NMiniKQL::TPgType* type, char cmd, TInputBuf& buf); NUdf::TUnboxedValue ReadYsonValuePg(NKikimr::NMiniKQL::TPgType* type, char cmd, TInputBuf& buf); +void SkipSkiffPg(NKikimr::NMiniKQL::TPgType* type, TInputBuf& buf); + NKikimr::NUdf::TUnboxedValue ReadSkiffPg(NKikimr::NMiniKQL::TPgType* type, TInputBuf& buf); void WriteSkiffPg(NKikimr::NMiniKQL::TPgType* type, const NKikimr::NUdf::TUnboxedValuePod& value, TOutputBuf& buf); diff --git a/ydb/library/yql/providers/common/codec/yql_codec.cpp b/ydb/library/yql/providers/common/codec/yql_codec.cpp index 2d135d002f5..a404e27f9f0 100644 --- a/ydb/library/yql/providers/common/codec/yql_codec.cpp +++ b/ydb/library/yql/providers/common/codec/yql_codec.cpp @@ -753,10 +753,14 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, CHECK_EXPECTED(cmd, BeginListSymbol); cmd = buf.Read(); - ui64 index = 0; + i64 index = 0; if (isTableFormat) { - CHECK_EXPECTED(cmd, Uint64Marker); - index = buf.ReadVarUI64(); + YQL_ENSURE(cmd == Int64Marker || cmd == Uint64Marker); + if (cmd == Uint64Marker) { + index = buf.ReadVarUI64(); + } else { + index = buf.ReadVarI64(); + } } else { if (cmd == BeginListSymbol) { cmd = buf.Read(); @@ -1021,8 +1025,7 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, CHECK_EXPECTED(cmd, EndListSymbol); return ret; - } - else { + } else { cmd = buf.Read(); for (;;) { @@ -1524,6 +1527,176 @@ NUdf::TUnboxedValue ReadSkiffData(TType* type, ui64 nativeYtTypeFlags, TInputBuf } } +void SkipSkiffField(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, TInputBuf& buf) { + const bool isOptional = type->IsOptional(); + if (isOptional) { + // Unwrap optional + type = static_cast<TOptionalType*>(type)->GetItemType(); + } + + if (isOptional) { + auto marker = buf.Read(); + if (!marker) { + return; + } + } + + if (type->IsData()) { + auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); + switch (schemeType) { + case NUdf::TDataType<bool>::Id: + buf.SkipMany(sizeof(ui8)); + break; + + case NUdf::TDataType<ui8>::Id: + case NUdf::TDataType<ui16>::Id: + case NUdf::TDataType<ui32>::Id: + case NUdf::TDataType<ui64>::Id: + case NUdf::TDataType<NUdf::TDate>::Id: + case NUdf::TDataType<NUdf::TDatetime>::Id: + case NUdf::TDataType<NUdf::TTimestamp>::Id: + buf.SkipMany(sizeof(ui64)); + break; + + case NUdf::TDataType<i8>::Id: + case NUdf::TDataType<i16>::Id: + case NUdf::TDataType<i32>::Id: + case NUdf::TDataType<i64>::Id: + case NUdf::TDataType<NUdf::TInterval>::Id: + buf.SkipMany(sizeof(i64)); + break; + + case NUdf::TDataType<float>::Id: + case NUdf::TDataType<double>::Id: + buf.SkipMany(sizeof(double)); + break; + + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TYson>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TTzDate>::Id: + case NUdf::TDataType<NUdf::TTzDatetime>::Id: + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + buf.SkipMany(size); + break; + } + case NUdf::TDataType<NUdf::TDecimal>::Id: { + if (nativeYtTypeFlags & NTCF_DECIMAL) { + auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); + if (params.first < 10) { + buf.SkipMany(sizeof(i32)); + } else if (params.first < 19) { + buf.SkipMany(sizeof(i64)); + } else { + buf.SkipMany(sizeof(NDecimal::TInt128)); + } + } else { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + buf.SkipMany(size); + } + break; + } + default: + YQL_ENSURE(false, "Unsupported data type: " << schemeType); + } + return; + } + + if (type->IsPg()) { + SkipSkiffPg(static_cast<TPgType*>(type), buf); + return; + } + + if (type->IsStruct()) { + auto structType = static_cast<TStructType*>(type); + const std::vector<size_t>* reorder = nullptr; + if (auto cookie = structType->GetCookie()) { + reorder = ((const std::vector<size_t>*)cookie); + } + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + SkipSkiffField(structType->GetMemberType(reorder ? reorder->at(i) : i), nativeYtTypeFlags, buf); + } + return; + } + + if (type->IsList()) { + auto itemType = static_cast<TListType*>(type)->GetItemType(); + while (buf.Read() == '\0') { + SkipSkiffField(itemType, nativeYtTypeFlags, buf); + } + return; + } + + if (type->IsTuple()) { + auto tupleType = static_cast<TTupleType*>(type); + + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + SkipSkiffField(tupleType->GetElementType(i), nativeYtTypeFlags, buf); + } + return; + } + + if (type->IsVariant()) { + auto varType = AS_TYPE(TVariantType, type); + ui16 data = 0; + if (varType->GetAlternativesCount() < 256) { + buf.ReadMany((char*)&data, 1); + } else { + buf.ReadMany((char*)&data, sizeof(data)); + } + + if (varType->GetUnderlyingType()->IsTuple()) { + auto tupleType = AS_TYPE(TTupleType, varType->GetUnderlyingType()); + YQL_ENSURE(data < tupleType->GetElementsCount()); + SkipSkiffField(tupleType->GetElementType(data), nativeYtTypeFlags, buf); + } else { + auto structType = AS_TYPE(TStructType, varType->GetUnderlyingType()); + if (auto cookie = structType->GetCookie()) { + const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); + data = reorder[data]; + } + YQL_ENSURE(data < structType->GetMembersCount()); + + SkipSkiffField(structType->GetMemberType(data), nativeYtTypeFlags, buf); + } + return; + } + + if (type->IsVoid()) { + return; + } + + if (type->IsNull()) { + return; + } + + if (type->IsEmptyList() || type->IsEmptyDict()) { + return; + } + + if (type->IsDict()) { + auto dictType = AS_TYPE(TDictType, type); + auto keyType = dictType->GetKeyType(); + auto payloadType = dictType->GetPayloadType(); + while (buf.Read() == '\0') { + SkipSkiffField(keyType, nativeYtTypeFlags, buf); + SkipSkiffField(payloadType, nativeYtTypeFlags, buf); + } + return; + } + + YQL_ENSURE(false, "Unsupported type for skip: " << type->GetKindAsStr()); +} + NKikimr::NUdf::TUnboxedValue ReadSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NMiniKQL::THolderFactory& holderFactory, TInputBuf& buf) { diff --git a/ydb/library/yql/providers/common/codec/yql_codec.h b/ydb/library/yql/providers/common/codec/yql_codec.h index 39a95196c3c..351c2e3322b 100644 --- a/ydb/library/yql/providers/common/codec/yql_codec.h +++ b/ydb/library/yql/providers/common/codec/yql_codec.h @@ -77,6 +77,8 @@ extern "C" void ReadYsonContainerValue(NKikimr::NMiniKQL::TType* type, const NKikimr::NMiniKQL::THolderFactory& holderFactory, NKikimr::NUdf::TUnboxedValue& value, NCommon::TInputBuf& buf, bool wrapOptional); +void SkipSkiffField(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, TInputBuf& buf); + NKikimr::NUdf::TUnboxedValue ReadSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NMiniKQL::THolderFactory& holderFactory, TInputBuf& buf); diff --git a/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.darwin-x86_64.txt index 0105a602cfa..a96cbb28b36 100644 --- a/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.darwin-x86_64.txt @@ -76,7 +76,7 @@ add_custom_command( -globalopt -globaldce -internalize - -internalize-public-api-list=WriteJust#WriteNothing#WriteBool#Write8#Write16#Write32#Write64#Write120#WriteDecimal32#WriteDecimal64#WriteDecimal128#WriteFloat#WriteDouble#WriteString#ReadBool#ReadInt8#ReadUint8#ReadInt16#ReadUint16#ReadInt32#ReadUint32#ReadInt64#ReadUint64#ReadInt120#ReadDecimal32#ReadDecimal64#ReadDecimal128#ReadFloat#ReadDouble#ReadOptional#SkipFixedData#SkipVarData#ReadTzDate#ReadTzDatetime#ReadTzTimestamp#WriteTzDate#WriteTzDatetime#WriteTzTimestamp#GetWrittenBytes#FillZero + -internalize-public-api-list=WriteJust#WriteNothing#WriteBool#Write8#Write16#Write32#Write64#Write120#WriteDecimal32#WriteDecimal64#WriteDecimal128#WriteFloat#WriteDouble#WriteString#ReadBool#ReadInt8#ReadUint8#ReadInt16#ReadUint16#ReadInt32#ReadUint32#ReadInt64#ReadUint64#ReadInt120#ReadDecimal32#ReadDecimal64#ReadDecimal128#ReadFloat#ReadDouble#ReadOptional#ReadVariantData#SkipFixedData#SkipVarData#ReadTzDate#ReadTzDatetime#ReadTzTimestamp#WriteTzDate#WriteTzDatetime#WriteTzTimestamp#GetWrittenBytes#FillZero ) add_custom_command( OUTPUT diff --git a/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.linux-aarch64.txt b/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.linux-aarch64.txt index fba2ebc670b..171d66fff4b 100644 --- a/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.linux-aarch64.txt @@ -78,7 +78,7 @@ add_custom_command( -globalopt -globaldce -internalize - -internalize-public-api-list=WriteJust#WriteNothing#WriteBool#Write8#Write16#Write32#Write64#Write120#WriteDecimal32#WriteDecimal64#WriteDecimal128#WriteFloat#WriteDouble#WriteString#ReadBool#ReadInt8#ReadUint8#ReadInt16#ReadUint16#ReadInt32#ReadUint32#ReadInt64#ReadUint64#ReadInt120#ReadDecimal32#ReadDecimal64#ReadDecimal128#ReadFloat#ReadDouble#ReadOptional#SkipFixedData#SkipVarData#ReadTzDate#ReadTzDatetime#ReadTzTimestamp#WriteTzDate#WriteTzDatetime#WriteTzTimestamp#GetWrittenBytes#FillZero + -internalize-public-api-list=WriteJust#WriteNothing#WriteBool#Write8#Write16#Write32#Write64#Write120#WriteDecimal32#WriteDecimal64#WriteDecimal128#WriteFloat#WriteDouble#WriteString#ReadBool#ReadInt8#ReadUint8#ReadInt16#ReadUint16#ReadInt32#ReadUint32#ReadInt64#ReadUint64#ReadInt120#ReadDecimal32#ReadDecimal64#ReadDecimal128#ReadFloat#ReadDouble#ReadOptional#ReadVariantData#SkipFixedData#SkipVarData#ReadTzDate#ReadTzDatetime#ReadTzTimestamp#WriteTzDate#WriteTzDatetime#WriteTzTimestamp#GetWrittenBytes#FillZero ) add_custom_command( OUTPUT diff --git a/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.linux-x86_64.txt b/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.linux-x86_64.txt index fba2ebc670b..171d66fff4b 100644 --- a/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.linux-x86_64.txt @@ -78,7 +78,7 @@ add_custom_command( -globalopt -globaldce -internalize - -internalize-public-api-list=WriteJust#WriteNothing#WriteBool#Write8#Write16#Write32#Write64#Write120#WriteDecimal32#WriteDecimal64#WriteDecimal128#WriteFloat#WriteDouble#WriteString#ReadBool#ReadInt8#ReadUint8#ReadInt16#ReadUint16#ReadInt32#ReadUint32#ReadInt64#ReadUint64#ReadInt120#ReadDecimal32#ReadDecimal64#ReadDecimal128#ReadFloat#ReadDouble#ReadOptional#SkipFixedData#SkipVarData#ReadTzDate#ReadTzDatetime#ReadTzTimestamp#WriteTzDate#WriteTzDatetime#WriteTzTimestamp#GetWrittenBytes#FillZero + -internalize-public-api-list=WriteJust#WriteNothing#WriteBool#Write8#Write16#Write32#Write64#Write120#WriteDecimal32#WriteDecimal64#WriteDecimal128#WriteFloat#WriteDouble#WriteString#ReadBool#ReadInt8#ReadUint8#ReadInt16#ReadUint16#ReadInt32#ReadUint32#ReadInt64#ReadUint64#ReadInt120#ReadDecimal32#ReadDecimal64#ReadDecimal128#ReadFloat#ReadDouble#ReadOptional#ReadVariantData#SkipFixedData#SkipVarData#ReadTzDate#ReadTzDatetime#ReadTzTimestamp#WriteTzDate#WriteTzDatetime#WriteTzTimestamp#GetWrittenBytes#FillZero ) add_custom_command( OUTPUT diff --git a/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.windows-x86_64.txt b/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.windows-x86_64.txt index 0105a602cfa..a96cbb28b36 100644 --- a/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/providers/yt/codec/codegen/CMakeLists.windows-x86_64.txt @@ -76,7 +76,7 @@ add_custom_command( -globalopt -globaldce -internalize - -internalize-public-api-list=WriteJust#WriteNothing#WriteBool#Write8#Write16#Write32#Write64#Write120#WriteDecimal32#WriteDecimal64#WriteDecimal128#WriteFloat#WriteDouble#WriteString#ReadBool#ReadInt8#ReadUint8#ReadInt16#ReadUint16#ReadInt32#ReadUint32#ReadInt64#ReadUint64#ReadInt120#ReadDecimal32#ReadDecimal64#ReadDecimal128#ReadFloat#ReadDouble#ReadOptional#SkipFixedData#SkipVarData#ReadTzDate#ReadTzDatetime#ReadTzTimestamp#WriteTzDate#WriteTzDatetime#WriteTzTimestamp#GetWrittenBytes#FillZero + -internalize-public-api-list=WriteJust#WriteNothing#WriteBool#Write8#Write16#Write32#Write64#Write120#WriteDecimal32#WriteDecimal64#WriteDecimal128#WriteFloat#WriteDouble#WriteString#ReadBool#ReadInt8#ReadUint8#ReadInt16#ReadUint16#ReadInt32#ReadUint32#ReadInt64#ReadUint64#ReadInt120#ReadDecimal32#ReadDecimal64#ReadDecimal128#ReadFloat#ReadDouble#ReadOptional#ReadVariantData#SkipFixedData#SkipVarData#ReadTzDate#ReadTzDatetime#ReadTzTimestamp#WriteTzDate#WriteTzDatetime#WriteTzTimestamp#GetWrittenBytes#FillZero ) add_custom_command( OUTPUT diff --git a/ydb/library/yql/providers/yt/codec/codegen/ya.make b/ydb/library/yql/providers/yt/codec/codegen/ya.make index 79b07b27260..422511b8dae 100644 --- a/ydb/library/yql/providers/yt/codec/codegen/ya.make +++ b/ydb/library/yql/providers/yt/codec/codegen/ya.make @@ -59,6 +59,7 @@ IF (NOT MKQL_DISABLE_CODEGEN) ReadFloat ReadDouble ReadOptional + ReadVariantData SkipFixedData SkipVarData ReadTzDate diff --git a/ydb/library/yql/providers/yt/codec/codegen/yt_codec_bc.cpp b/ydb/library/yql/providers/yt/codec/codegen/yt_codec_bc.cpp index c0deefdcd4d..fd7efec8e1c 100644 --- a/ydb/library/yql/providers/yt/codec/codegen/yt_codec_bc.cpp +++ b/ydb/library/yql/providers/yt/codec/codegen/yt_codec_bc.cpp @@ -205,6 +205,17 @@ extern "C" ui8 ReadOptional(void* vbuf) { return buf.Read(); } +extern "C" ui16 ReadVariantData(void* vbuf, ui8 oneByte) { + NCommon::TInputBuf& buf = *(NCommon::TInputBuf*)vbuf; + if (oneByte) { + return buf.Read(); + } else { + ui16 data = 0; + buf.ReadMany((char*)&data, sizeof(data)); + return data; + } +} + extern "C" void SkipFixedData(void* vbuf, ui64 size) { NCommon::TInputBuf& buf = *(NCommon::TInputBuf*)vbuf; buf.SkipMany(size); diff --git a/ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.cpp b/ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.cpp index 7547f3580d9..382c1ef6f2d 100644 --- a/ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.cpp +++ b/ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.cpp @@ -832,9 +832,154 @@ private: default: YQL_ENSURE(false, "Unknown data type: " << schemeType); } - } else { - ythrow yexception() << "Skip of complex types is not supported"; + return; + } + + if (type->IsStruct()) { + auto structType = static_cast<TStructType*>(type); + const std::vector<size_t>* reorder = nullptr; + if (auto cookie = structType->GetCookie()) { + reorder = ((const std::vector<size_t>*)cookie); + } + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + GenerateSkip(buf, structType->GetMemberType(reorder ? reorder->at(i) : i), nativeYtTypeFlags); + } + return; + } + + if (type->IsTuple()) { + auto tupleType = static_cast<TTupleType*>(type); + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + GenerateSkip(buf, tupleType->GetElementType(i), nativeYtTypeFlags); + } + return; + } + + if (type->IsList()) { + auto itemType = static_cast<TListType*>(type)->GetItemType(); + const auto done = BasicBlock::Create(context, "done", Func_); + const auto listEndMarker = ConstantInt::get(Type::getInt8Ty(context), 0xFF); + const auto innerSkip = BasicBlock::Create(context, "innerSkip", Func_); + const auto listContinue = BasicBlock::Create(context, "listContinue", Func_); + BranchInst::Create(listContinue, Block_); + + { + Block_ = innerSkip; + GenerateSkip(buf, itemType, nativeYtTypeFlags); + BranchInst::Create(listContinue, Block_); + } + { + Block_ = listContinue; + const auto marker = CallInst::Create(module.getFunction("ReadOptional"), { buf }, "optMarker", Block_); + const auto check = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, marker, listEndMarker, "exists", Block_); + BranchInst::Create(done, innerSkip, check, Block_); + } + + Block_ = done; + return; + } + + if (type->IsVariant()) { + auto varType = static_cast<TVariantType*>(type); + const auto isOneByte = ConstantInt::get(Type::getInt8Ty(context), varType->GetAlternativesCount() < 256); + const auto data = CallInst::Create(module.getFunction("ReadVariantData"), { buf, isOneByte }, "data", Block_); + + std::function<TType*(size_t)> getType; + std::function<void(size_t, size_t)> genLR = [&] (size_t l, size_t r){ + size_t m = (l + r) >> 1; + if (l == r) { + GenerateSkip(buf, getType(m), nativeYtTypeFlags); + return; + } + auto fn = std::to_string(l) + "_" + std::to_string(r); + const auto currIdx = ConstantInt::get(Type::getInt16Ty(context), m); + const auto isCurrent = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULE, data, currIdx, "isUs" + fn, Block_); + auto lessEq = BasicBlock::Create(context, "le" + fn, Func_); + auto greater = BasicBlock::Create(context, "g" + fn, Func_); + auto out = BasicBlock::Create(context, "o" + fn, Func_); + BranchInst::Create(lessEq, greater, isCurrent, Block_); + { + Block_ = lessEq; + genLR(l, m); + BranchInst::Create(out, Block_); + } + { + Block_ = greater; + genLR(m + 1, r); + BranchInst::Create(out, Block_); + } + Block_ = out; + }; + + size_t elemCount = 0; + if (varType->GetUnderlyingType()->IsTuple()) { + auto tupleType = static_cast<TTupleType*>(varType->GetUnderlyingType()); + elemCount = tupleType->GetElementsCount(); + getType = [tupleType=tupleType] (size_t i) { + return tupleType->GetElementType(i); + }; + } else { + auto structType = static_cast<TStructType*>(varType->GetUnderlyingType()); + + const std::vector<size_t>* reorder = nullptr; + if (auto cookie = structType->GetCookie()) { + reorder = ((const std::vector<size_t>*)cookie); + } + + elemCount = structType->GetMembersCount(); + + getType = [reorder = reorder, structType=structType] (size_t i) { + return structType->GetMemberType(reorder ? reorder->at(i) : i); + }; + } + genLR(0, elemCount - 1); + return; + } + + if (type->IsVoid()) { + return; + } + + if (type->IsNull()) { + return; + } + + if (type->IsEmptyList() || type->IsEmptyDict()) { + return; + } + + if (type->IsDict()) { + auto dictType = static_cast<TDictType*>(type); + auto keyType = dictType->GetKeyType(); + auto payloadType = dictType->GetPayloadType(); + const auto done = BasicBlock::Create(context, "done", Func_); + + const auto innerSkip = BasicBlock::Create(context, "innerSkip", Func_); + + const auto listContinue = BasicBlock::Create(context, "listContinue", Func_); + + const auto listEndMarker = ConstantInt::get(Type::getInt8Ty(context), 0xFF); + + BranchInst::Create(listContinue, Block_); + + { + Block_ = innerSkip; + GenerateSkip(buf, keyType, nativeYtTypeFlags); + GenerateSkip(buf, payloadType, nativeYtTypeFlags); + BranchInst::Create(listContinue, Block_); + } + { + Block_ = listContinue; + const auto marker = CallInst::Create(module.getFunction("ReadOptional"), { buf }, "optMarker", Block_); + const auto check = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, marker, listEndMarker, "exists", Block_); + BranchInst::Create(done, innerSkip, check, Block_); + } + + Block_ = done; + return; } + + YQL_ENSURE(false, "Unsupported type for skip: " << type->GetKindAsStr()); } private: diff --git a/ydb/library/yql/providers/yt/codec/yt_codec_io.cpp b/ydb/library/yql/providers/yt/codec/yt_codec_io.cpp index 9268ecf4a0b..450ade03b6e 100644 --- a/ydb/library/yql/providers/yt/codec/yt_codec_io.cpp +++ b/ydb/library/yql/providers/yt/codec/yt_codec_io.cpp @@ -2,6 +2,7 @@ #include <ydb/library/yql/providers/common/codec/yql_restricted_yson.h> #include <ydb/library/yql/providers/common/codec/yql_codec_type_flags.h> +#include <ydb/library/yql/providers/common/codec/yql_codec.h> #include <ydb/library/yql/providers/yt/common/yql_names.h> #ifndef MKQL_DISABLE_CODEGEN #include <ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.h> @@ -913,19 +914,28 @@ protected: case TType::EKind::Variant: { auto varType = static_cast<TVariantType*>(type); CHECK_EXPECTED(cmd, BeginListSymbol); - EXPECTED(Buf_, Uint64Marker); - ui64 index = Buf_.ReadVarUI64(); - YQL_ENSURE(index < varType->GetAlternativesCount(), "Bad variant alternative: " << index << ", only " << - varType->GetAlternativesCount() << " are available"); + cmd = Buf_.Read(); + YQL_ENSURE(cmd == Int64Marker || cmd == Uint64Marker, "Excepted [U]Int64 marker, but got: " << int(cmd)); auto underlyingType = varType->GetUnderlyingType(); YQL_ENSURE(underlyingType->IsTuple() || underlyingType->IsStruct(), "Wrong underlying type"); TType* itemType; + i64 index; + + if (cmd == Int64Marker) { + index = Buf_.ReadVarI64(); + } else { + index = Buf_.ReadVarUI64(); + } + + YQL_ENSURE(index > -1 && index < varType->GetAlternativesCount(), "Bad variant alternative: " << index << ", only " << + varType->GetAlternativesCount() << " are available"); if (underlyingType->IsTuple()) { itemType = static_cast<TTupleType*>(underlyingType)->GetElementType(index); } else { itemType = static_cast<TStructType*>(underlyingType)->GetMemberType(index); } + EXPECTED(Buf_, ListItemSeparatorSymbol); cmd = Buf_.Read(); SkipValue(itemType, cmd); @@ -1004,18 +1014,42 @@ protected: case TType::EKind::Struct: { auto structType = static_cast<TStructType*>(type); - CHECK_EXPECTED(cmd, BeginListSymbol); - cmd = Buf_.Read(); + YQL_ENSURE(cmd == BeginMapSymbol || cmd == BeginListSymbol); + if (cmd == BeginListSymbol) { + cmd = Buf_.Read(); + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + SkipValue(structType->GetMemberType(i), cmd); + cmd = Buf_.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = Buf_.Read(); + } + } + CHECK_EXPECTED(cmd, EndMapSymbol); + break; + } + + cmd = Buf_.Read(); for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - SkipValue(structType->GetMemberType(i), cmd); + CHECK_EXPECTED(cmd, StringMarker); + const i32 length = Buf_.ReadVarI32(); + CHECK_STRING_LENGTH(length); + TString name(length, '\0'); + Buf_.ReadMany((char*)name.data(), length); + cmd = Buf_.Read(); + CHECK_EXPECTED(cmd, KeyValueSeparatorSymbol); + + auto idx = structType->FindMemberIndex(name); + YQL_ENSURE(idx); + cmd = Buf_.Read(); + SkipValue(structType->GetMemberType(*idx), cmd); cmd = Buf_.Read(); if (cmd == ListItemSeparatorSymbol) { cmd = Buf_.Read(); } } - CHECK_EXPECTED(cmd, EndListSymbol); + CHECK_EXPECTED(cmd, EndMapSymbol); break; } @@ -1331,89 +1365,7 @@ protected: } void SkipSkiffField(TType* type, ui64 nativeYtTypeFlags) { - const bool isOptional = type->IsOptional(); - TType* uwrappedType = type; - if (type->IsOptional()) { - uwrappedType = static_cast<TOptionalType*>(type)->GetItemType(); - } - - if (isOptional) { - auto marker = Buf_.Read(); - if (!marker) { - return; - } - } - - if (uwrappedType->IsData()) { - auto schemeType = static_cast<TDataType*>(uwrappedType)->GetSchemeType(); - switch (schemeType) { - case NUdf::TDataType<bool>::Id: - Buf_.SkipMany(sizeof(ui8)); - break; - - case NUdf::TDataType<ui8>::Id: - case NUdf::TDataType<ui16>::Id: - case NUdf::TDataType<ui32>::Id: - case NUdf::TDataType<ui64>::Id: - case NUdf::TDataType<NUdf::TDate>::Id: - case NUdf::TDataType<NUdf::TDatetime>::Id: - case NUdf::TDataType<NUdf::TTimestamp>::Id: - Buf_.SkipMany(sizeof(ui64)); - break; - - case NUdf::TDataType<i8>::Id: - case NUdf::TDataType<i16>::Id: - case NUdf::TDataType<i32>::Id: - case NUdf::TDataType<i64>::Id: - case NUdf::TDataType<NUdf::TInterval>::Id: - Buf_.SkipMany(sizeof(i64)); - break; - - case NUdf::TDataType<float>::Id: - case NUdf::TDataType<double>::Id: - Buf_.SkipMany(sizeof(double)); - break; - - case NUdf::TDataType<NUdf::TUtf8>::Id: - case NUdf::TDataType<char*>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TYson>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TTzDate>::Id: - case NUdf::TDataType<NUdf::TTzDatetime>::Id: - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - ui32 size; - Buf_.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - Buf_.SkipMany(size); - break; - } - case NUdf::TDataType<NUdf::TDecimal>::Id: { - if (nativeYtTypeFlags & NTCF_DECIMAL) { - auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); - if (params.first < 10) { - Buf_.SkipMany(sizeof(i32)); - } else if (params.first < 19) { - Buf_.SkipMany(sizeof(i64)); - } else { - Buf_.SkipMany(sizeof(NDecimal::TInt128)); - } - } else { - ui32 size; - Buf_.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - Buf_.SkipMany(size); - } - break; - } - default: - YQL_ENSURE(false, "Unsupported data type: " << schemeType); - } - } else { - ythrow yexception() << "Skip of complex types is not supported"; - } + return NCommon::SkipSkiffField(type, nativeYtTypeFlags, Buf_); } }; diff --git a/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp b/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp index 93872431c6a..b198d61f18c 100644 --- a/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp +++ b/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp @@ -92,6 +92,12 @@ NUdf::TUnboxedValue ReadYsonValuePg(NKikimr::NMiniKQL::TPgType* type, char cmd, throw yexception() << "ReadYsonValuePg: PG types are not supported"; } +void SkipSkiffPg(NKikimr::NMiniKQL::TPgType* type, NCommon::TInputBuf& buf) { + Y_UNUSED(type); + Y_UNUSED(buf); + throw yexception() << "SkipSkiffPg: PG types are not supported"; +} + NKikimr::NUdf::TUnboxedValue ReadSkiffPg(NKikimr::NMiniKQL::TPgType* type, NCommon::TInputBuf& buf) { Y_UNUSED(type); Y_UNUSED(buf); |