summaryrefslogtreecommitdiffstats
path: root/yql/essentials/minikql/jsonpath/parser/binary.cpp
diff options
context:
space:
mode:
authorvvvv <[email protected]>2024-12-18 21:57:17 +0300
committervvvv <[email protected]>2024-12-18 22:58:34 +0300
commitf7f8a1dcd4b4b4a1af7acc6d8127d285c3850795 (patch)
treec22ca71d9eb197030fbd946452f4aadf613df95c /yql/essentials/minikql/jsonpath/parser/binary.cpp
parent9690d79e797ada85de4a648fbc1e386ea4621efe (diff)
Extracted parser from jsonpath lib
init commit_hash:8c6dd46ee72034f0480757612dcceb524d19a1d1
Diffstat (limited to 'yql/essentials/minikql/jsonpath/parser/binary.cpp')
-rw-r--r--yql/essentials/minikql/jsonpath/parser/binary.cpp604
1 files changed, 604 insertions, 0 deletions
diff --git a/yql/essentials/minikql/jsonpath/parser/binary.cpp b/yql/essentials/minikql/jsonpath/parser/binary.cpp
new file mode 100644
index 00000000000..8d75a6d3b90
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/parser/binary.cpp
@@ -0,0 +1,604 @@
+#include "binary.h"
+
+#include <yql/essentials/utils/yql_panic.h>
+
+namespace NYql::NJsonPath {
+
+bool TArraySubscriptOffsets::IsRange() const {
+ return ToOffset > 0;
+}
+
+const TStringBuf TJsonPathItem::GetString() const {
+ return std::get<TStringBuf>(Data);
+}
+
+const TVector<TArraySubscriptOffsets>& TJsonPathItem::GetSubscripts() const {
+ return std::get<TVector<TArraySubscriptOffsets>>(Data);
+}
+
+const TBinaryOpArgumentsOffset& TJsonPathItem::GetBinaryOpArguments() const {
+ return std::get<TBinaryOpArgumentsOffset>(Data);
+}
+
+double TJsonPathItem::GetNumber() const {
+ return std::get<double>(Data);
+}
+
+bool TJsonPathItem::GetBoolean() const {
+ return std::get<bool>(Data);
+}
+
+TFilterPredicateOffset TJsonPathItem::GetFilterPredicateOffset() const {
+ return std::get<TFilterPredicateOffset>(Data);
+}
+
+TStartsWithPrefixOffset TJsonPathItem::GetStartsWithPrefixOffset() const {
+ return std::get<TStartsWithPrefixOffset>(Data);
+}
+
+const NReWrapper::IRePtr& TJsonPathItem::GetRegex() const {
+ return std::get<NReWrapper::IRePtr>(Data);
+}
+
+TJsonPathReader::TJsonPathReader(const TJsonPathPtr path)
+ : Path(path)
+ , InitialPos(0)
+ , Mode(ReadMode(InitialPos))
+{
+}
+
+const TJsonPathItem& TJsonPathReader::ReadFirst() {
+ return ReadFromPos(InitialPos);
+}
+
+const TJsonPathItem& TJsonPathReader::ReadInput(const TJsonPathItem& item) {
+ YQL_ENSURE(item.InputItemOffset.Defined());
+ return ReadFromPos(*item.InputItemOffset);
+}
+
+const TJsonPathItem& TJsonPathReader::ReadFromSubscript(const TArraySubscriptOffsets& subscript) {
+ return ReadFromPos(subscript.FromOffset);
+}
+
+const TJsonPathItem& TJsonPathReader::ReadToSubscript(const TArraySubscriptOffsets& subscript) {
+ YQL_ENSURE(subscript.IsRange());
+ return ReadFromPos(subscript.ToOffset);
+}
+
+const TJsonPathItem& TJsonPathReader::ReadLeftOperand(const TJsonPathItem& node) {
+ return ReadFromPos(node.GetBinaryOpArguments().LeftOffset);
+}
+
+const TJsonPathItem& TJsonPathReader::ReadRightOperand(const TJsonPathItem& node) {
+ return ReadFromPos(node.GetBinaryOpArguments().RightOffset);
+}
+
+const TJsonPathItem& TJsonPathReader::ReadFilterPredicate(const TJsonPathItem& node) {
+ return ReadFromPos(node.GetFilterPredicateOffset().Offset);
+}
+
+const TJsonPathItem& TJsonPathReader::ReadPrefix(const TJsonPathItem& node) {
+ return ReadFromPos(node.GetStartsWithPrefixOffset().Offset);
+}
+
+EJsonPathMode TJsonPathReader::GetMode() const {
+ return Mode;
+}
+
+const TJsonPathItem& TJsonPathReader::ReadFromPos(TUint pos) {
+ YQL_ENSURE(pos < Path->Size());
+
+ const auto it = ItemCache.find(pos);
+ if (it != ItemCache.end()) {
+ return it->second;
+ }
+
+ TJsonPathItem& result = ItemCache[pos];
+ result.Type = ReadType(pos);
+
+ const auto row = ReadUint(pos);
+ const auto column = ReadUint(pos);
+ result.Pos = TPosition(column, row, "jsonpath");
+
+ switch (result.Type) {
+ // Items without input
+ case EJsonPathItemType::FilterObject:
+ case EJsonPathItemType::NullLiteral:
+ case EJsonPathItemType::ContextObject:
+ case EJsonPathItemType::LastArrayIndex:
+ break;
+
+ case EJsonPathItemType::Variable:
+ case EJsonPathItemType::StringLiteral:
+ result.Data = ReadString(pos);
+ break;
+
+ case EJsonPathItemType::NumberLiteral:
+ result.Data = ReadDouble(pos);
+ break;
+
+ case EJsonPathItemType::BooleanLiteral:
+ result.Data = ReadBool(pos);
+ break;
+
+ // Items with single input
+ case EJsonPathItemType::TypeMethod:
+ case EJsonPathItemType::SizeMethod:
+ case EJsonPathItemType::KeyValueMethod:
+ case EJsonPathItemType::AbsMethod:
+ case EJsonPathItemType::FloorMethod:
+ case EJsonPathItemType::CeilingMethod:
+ case EJsonPathItemType::DoubleMethod:
+ case EJsonPathItemType::WildcardArrayAccess:
+ case EJsonPathItemType::WildcardMemberAccess:
+ case EJsonPathItemType::UnaryMinus:
+ case EJsonPathItemType::UnaryPlus:
+ case EJsonPathItemType::UnaryNot:
+ case EJsonPathItemType::IsUnknownPredicate:
+ case EJsonPathItemType::ExistsPredicate:
+ result.InputItemOffset = ReadUint(pos);
+ break;
+
+ case EJsonPathItemType::MemberAccess:
+ result.Data = ReadString(pos);
+ result.InputItemOffset = ReadUint(pos);
+ break;
+
+ case EJsonPathItemType::ArrayAccess:
+ result.Data = ReadSubscripts(pos);
+ result.InputItemOffset = ReadUint(pos);
+ break;
+
+ case EJsonPathItemType::FilterPredicate:
+ result.Data = TFilterPredicateOffset{ReadUint(pos)};
+ result.InputItemOffset = ReadUint(pos);
+ break;
+
+ case EJsonPathItemType::StartsWithPredicate:
+ result.Data = TStartsWithPrefixOffset{ReadUint(pos)};
+ result.InputItemOffset = ReadUint(pos);
+ break;
+
+ case EJsonPathItemType::LikeRegexPredicate: {
+ const auto serializedRegex = ReadString(pos);
+
+ auto regex = NReWrapper::NDispatcher::Deserialize(serializedRegex);
+ result.Data = std::move(regex);
+ result.InputItemOffset = ReadUint(pos);
+ break;
+ }
+
+ // Items with 2 inputs
+ case EJsonPathItemType::BinaryAdd:
+ case EJsonPathItemType::BinarySubstract:
+ case EJsonPathItemType::BinaryMultiply:
+ case EJsonPathItemType::BinaryDivide:
+ case EJsonPathItemType::BinaryModulo:
+ case EJsonPathItemType::BinaryLess:
+ case EJsonPathItemType::BinaryLessEqual:
+ case EJsonPathItemType::BinaryGreater:
+ case EJsonPathItemType::BinaryGreaterEqual:
+ case EJsonPathItemType::BinaryEqual:
+ case EJsonPathItemType::BinaryNotEqual:
+ case EJsonPathItemType::BinaryAnd:
+ case EJsonPathItemType::BinaryOr:
+ TBinaryOpArgumentsOffset data;
+ data.LeftOffset = ReadUint(pos);
+ data.RightOffset = ReadUint(pos);
+ result.Data = data;
+ break;
+ }
+
+ return result;
+}
+
+TUint TJsonPathReader::ReadUint(TUint& pos) {
+ return ReadPOD<TUint>(pos);
+}
+
+double TJsonPathReader::ReadDouble(TUint& pos) {
+ return ReadPOD<double>(pos);
+}
+
+bool TJsonPathReader::ReadBool(TUint& pos) {
+ return ReadPOD<bool>(pos);
+}
+
+EJsonPathItemType TJsonPathReader::ReadType(TUint& pos) {
+ return static_cast<EJsonPathItemType>(ReadUint(pos));
+}
+
+EJsonPathMode TJsonPathReader::ReadMode(TUint& pos) {
+ return static_cast<EJsonPathMode>(ReadUint(pos));
+}
+
+const TStringBuf TJsonPathReader::ReadString(TUint& pos) {
+ TUint length = ReadUint(pos);
+ TStringBuf result(Path->Begin() + pos, length);
+ pos += length;
+ return result;
+}
+
+TVector<TArraySubscriptOffsets> TJsonPathReader::ReadSubscripts(TUint& pos) {
+ const auto count = ReadUint(pos);
+ TVector<TArraySubscriptOffsets> result(count);
+
+ for (size_t i = 0; i < count; i++) {
+ result[i].FromOffset = ReadUint(pos);
+ result[i].ToOffset = ReadUint(pos);
+ }
+ return result;
+}
+
+void TJsonPathBuilder::VisitRoot(const TRootNode& node) {
+ // Block structure:
+ // <(1) TUint>
+ // Components:
+ // (1) Must be casted to EJsonPathMode. Jsonpath execution mode
+ WriteMode(node.GetMode());
+ node.GetExpr()->Accept(*this);
+}
+
+void TJsonPathBuilder::VisitContextObject(const TContextObjectNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::ContextObject, node);
+}
+
+void TJsonPathBuilder::VisitVariable(const TVariableNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::Variable, node);
+ WriteString(node.GetName());
+}
+
+void TJsonPathBuilder::VisitLastArrayIndex(const TLastArrayIndexNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::LastArrayIndex, node);
+}
+
+void TJsonPathBuilder::VisitNumberLiteral(const TNumberLiteralNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::NumberLiteral, node);
+ WriteDouble(node.GetValue());
+}
+
+void TJsonPathBuilder::VisitMemberAccess(const TMemberAccessNode& node) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint>
+ // Components:
+ // (1) Must be casted to EJsonPathItemType. Member access item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Length of member name string
+ // (5) Member name string
+ // (6) Offset of the input item
+ WriteType(EJsonPathItemType::MemberAccess);
+ WritePos(node);
+ WriteString(node.GetMember());
+
+ WriteNextPosition();
+ node.GetInput()->Accept(*this);
+}
+
+void TJsonPathBuilder::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) {
+ WriteSingleInputItem(EJsonPathItemType::WildcardMemberAccess, node, node.GetInput());
+}
+
+void TJsonPathBuilder::VisitArrayAccess(const TArrayAccessNode& node) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) pair<TUint, TUint>[]> <(6) TUint> <(7) items>
+ // Components:
+ // (1) Must be casted to EJsonPathItemType. Array access item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Count of subscripts stored
+ // (5) Array of pairs with offsets to subscript items. If subscript is a single index, only first element
+ // is set to it's offset and second is zero. If subscript is a range, both pair elements are valid offsets
+ // to the elements of range (lower and upper bound).
+ // (6) Offset of the input item
+ // (7) Array of subcsripts. For details about encoding see VisitArraySubscript
+ WriteType(EJsonPathItemType::ArrayAccess);
+ WritePos(node);
+
+ // (4) Write count of subscripts stored
+ const auto& subscripts = node.GetSubscripts();
+ const auto count = subscripts.size();
+ WriteUint(count);
+
+ // (5) We do not know sizes of each subscript. Write array of zeros for offsets
+ const auto indexStart = CurrentEndPos();
+ TVector<TUint> offsets(2 * count);
+ WriteUintSequence(offsets);
+
+ // (6) Reserve space for input offset to rewrite it later
+ const auto inputStart = CurrentEndPos();
+ WriteFinishPosition();
+
+ // (7) Write all subscripts and record offset for each of them
+ for (size_t i = 0; i < count; i++) {
+ offsets[2 * i] = CurrentEndPos();
+ subscripts[i].From->Accept(*this);
+
+ if (subscripts[i].To) {
+ offsets[2 * i + 1] = CurrentEndPos();
+ subscripts[i].To->Accept(*this);
+ }
+ }
+
+ // (5) Rewrite offsets with correct values
+ RewriteUintSequence(offsets, indexStart);
+
+ // (6) Rewrite input offset
+ RewriteUint(CurrentEndPos(), inputStart);
+ node.GetInput()->Accept(*this);
+}
+
+void TJsonPathBuilder::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) {
+ WriteSingleInputItem(EJsonPathItemType::WildcardArrayAccess, node, node.GetInput());
+}
+
+void TJsonPathBuilder::VisitUnaryOperation(const TUnaryOperationNode& node) {
+ EJsonPathItemType type;
+ switch (node.GetOp()) {
+ case EUnaryOperation::Plus:
+ type = EJsonPathItemType::UnaryPlus;
+ break;
+ case EUnaryOperation::Minus:
+ type = EJsonPathItemType::UnaryMinus;
+ break;
+ case EUnaryOperation::Not:
+ type = EJsonPathItemType::UnaryNot;
+ break;
+ }
+
+ WriteSingleInputItem(type, node, node.GetExpr());
+}
+
+void TJsonPathBuilder::VisitBinaryOperation(const TBinaryOperationNode& node) {
+ EJsonPathItemType type;
+ switch (node.GetOp()) {
+ case EBinaryOperation::Add:
+ type = EJsonPathItemType::BinaryAdd;
+ break;
+ case EBinaryOperation::Substract:
+ type = EJsonPathItemType::BinarySubstract;
+ break;
+ case EBinaryOperation::Multiply:
+ type = EJsonPathItemType::BinaryMultiply;
+ break;
+ case EBinaryOperation::Divide:
+ type = EJsonPathItemType::BinaryDivide;
+ break;
+ case EBinaryOperation::Modulo:
+ type = EJsonPathItemType::BinaryModulo;
+ break;
+ case EBinaryOperation::Less:
+ type = EJsonPathItemType::BinaryLess;
+ break;
+ case EBinaryOperation::LessEqual:
+ type = EJsonPathItemType::BinaryLessEqual;
+ break;
+ case EBinaryOperation::Greater:
+ type = EJsonPathItemType::BinaryGreater;
+ break;
+ case EBinaryOperation::GreaterEqual:
+ type = EJsonPathItemType::BinaryGreaterEqual;
+ break;
+ case EBinaryOperation::Equal:
+ type = EJsonPathItemType::BinaryEqual;
+ break;
+ case EBinaryOperation::NotEqual:
+ type = EJsonPathItemType::BinaryNotEqual;
+ break;
+ case EBinaryOperation::And:
+ type = EJsonPathItemType::BinaryAnd;
+ break;
+ case EBinaryOperation::Or:
+ type = EJsonPathItemType::BinaryOr;
+ break;
+ }
+
+ WriteTwoInputsItem(type, node, node.GetLeftExpr(), node.GetRightExpr());
+}
+
+void TJsonPathBuilder::VisitBooleanLiteral(const TBooleanLiteralNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::BooleanLiteral, node);
+ WriteBool(node.GetValue());
+}
+
+void TJsonPathBuilder::VisitNullLiteral(const TNullLiteralNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::NullLiteral, node);
+}
+
+void TJsonPathBuilder::VisitStringLiteral(const TStringLiteralNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::StringLiteral, node);
+ WriteString(node.GetValue());
+}
+
+void TJsonPathBuilder::VisitFilterObject(const TFilterObjectNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::FilterObject, node);
+}
+
+void TJsonPathBuilder::VisitFilterPredicate(const TFilterPredicateNode& node) {
+ WriteTwoInputsItem(EJsonPathItemType::FilterPredicate, node, node.GetPredicate(), node.GetInput());
+}
+
+void TJsonPathBuilder::VisitMethodCall(const TMethodCallNode& node) {
+ EJsonPathItemType type;
+ switch (node.GetType()) {
+ case EMethodType::Abs:
+ type = EJsonPathItemType::AbsMethod;
+ break;
+ case EMethodType::Floor:
+ type = EJsonPathItemType::FloorMethod;
+ break;
+ case EMethodType::Ceiling:
+ type = EJsonPathItemType::CeilingMethod;
+ break;
+ case EMethodType::Double:
+ type = EJsonPathItemType::DoubleMethod;
+ break;
+ case EMethodType::Type:
+ type = EJsonPathItemType::TypeMethod;
+ break;
+ case EMethodType::Size:
+ type = EJsonPathItemType::SizeMethod;
+ break;
+ case EMethodType::KeyValue:
+ type = EJsonPathItemType::KeyValueMethod;
+ break;
+ }
+
+ WriteSingleInputItem(type, node, node.GetInput());
+}
+
+TJsonPathPtr TJsonPathBuilder::ShrinkAndGetResult() {
+ Result->ShrinkToFit();
+ return Result;
+}
+
+void TJsonPathBuilder::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) {
+ WriteTwoInputsItem(EJsonPathItemType::StartsWithPredicate, node, node.GetPrefix(), node.GetInput());
+}
+
+void TJsonPathBuilder::VisitExistsPredicate(const TExistsPredicateNode& node) {
+ WriteSingleInputItem(EJsonPathItemType::ExistsPredicate, node, node.GetInput());
+}
+
+void TJsonPathBuilder::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) {
+ WriteSingleInputItem(EJsonPathItemType::IsUnknownPredicate, node, node.GetInput());
+}
+
+void TJsonPathBuilder::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint>
+ // Components:
+ // (1) Must be casted to EJsonPathItemType. Member access item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Length of serialized Hyperscan database
+ // (5) Serialized Hyperscan database
+ // (6) Offset of the input item
+ WriteType(EJsonPathItemType::LikeRegexPredicate);
+ WritePos(node);
+
+ const TString serializedRegex = node.GetRegex()->Serialize();
+ WriteString(serializedRegex);
+
+ WriteNextPosition();
+ node.GetInput()->Accept(*this);
+}
+
+void TJsonPathBuilder::WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint>
+ // Components:
+ // (1) Item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ WriteType(type);
+ WritePos(node);
+}
+
+void TJsonPathBuilder::WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) item>
+ // Components:
+ // (1) Item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Offset of the input item
+ // (5) Input item
+ WriteZeroInputItem(type, node);
+
+ WriteNextPosition();
+ input->Accept(*this);
+}
+
+void TJsonPathBuilder::WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) TUint> <(6) item> <(7) item>
+ // Components:
+ // (1) Item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Offset of the first input
+ // (5) Offset of the second input
+ // (6) JsonPath item representing first input
+ // (7) JsonPath item representing right input
+ WriteZeroInputItem(type, node);
+
+ // (4) and (5) Fill offsets with zeros
+ const auto indexStart = CurrentEndPos();
+ WriteUint(0);
+ WriteUint(0);
+
+ // (6) Write first input and record it's offset
+ const auto firstInputStart = CurrentEndPos();
+ firstInput->Accept(*this);
+
+ // (7) Write second input and record it's offset
+ const auto secondInputStart = CurrentEndPos();
+ secondInput->Accept(*this);
+
+ // (4) and (5) Rewrite offsets with correct values
+ RewriteUintSequence({firstInputStart, secondInputStart}, indexStart);
+}
+
+void TJsonPathBuilder::WritePos(const TAstNode& node) {
+ WriteUint(node.GetPos().Row);
+ WriteUint(node.GetPos().Column);
+}
+
+void TJsonPathBuilder::WriteType(EJsonPathItemType type) {
+ WriteUint(static_cast<TUint>(type));
+}
+
+void TJsonPathBuilder::WriteMode(EJsonPathMode mode) {
+ WriteUint(static_cast<TUint>(mode));
+}
+
+void TJsonPathBuilder::WriteNextPosition() {
+ WriteUint(CurrentEndPos() + sizeof(TUint));
+}
+
+void TJsonPathBuilder::WriteFinishPosition() {
+ WriteUint(0);
+}
+
+void TJsonPathBuilder::WriteString(TStringBuf value) {
+ WriteUint(value.size());
+ Result->Append(value.data(), value.size());
+}
+
+void TJsonPathBuilder::RewriteUintSequence(const TVector<TUint>& sequence, TUint offset) {
+ const auto length = sequence.size() * sizeof(TUint);
+ Y_ASSERT(offset + length < CurrentEndPos());
+
+ MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(sequence.data()), length);
+}
+
+void TJsonPathBuilder::WriteUintSequence(const TVector<TUint>& sequence) {
+ const auto length = sequence.size() * sizeof(TUint);
+ Result->Append(reinterpret_cast<const char*>(sequence.data()), length);
+}
+
+void TJsonPathBuilder::RewriteUint(TUint value, TUint offset) {
+ Y_ASSERT(offset + sizeof(TUint) < CurrentEndPos());
+
+ MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(&value), sizeof(TUint));
+}
+
+void TJsonPathBuilder::WriteUint(TUint value) {
+ WritePOD(value);
+}
+
+void TJsonPathBuilder::WriteDouble(double value) {
+ WritePOD(value);
+}
+
+void TJsonPathBuilder::WriteBool(bool value) {
+ WritePOD(value);
+}
+
+TUint TJsonPathBuilder::CurrentEndPos() const {
+ return Result->Size();
+}
+
+
+}