aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordcherednik <dcherednik@ydb.tech>2022-11-22 11:36:15 +0300
committerdcherednik <dcherednik@ydb.tech>2022-11-22 11:36:15 +0300
commit76e1bee3584dea3717fb8c390c24298c0f3ad543 (patch)
treecf4ad09af047599c173a48db491af97793b0b870
parentaff864d15441e306a8af32306d5beac8051c01fb (diff)
downloadydb-76e1bee3584dea3717fb8c390c24298c0f3ad543.tar.gz
RE2 support for json path (ARM build).
-rw-r--r--ydb/library/CMakeLists.txt1
-rw-r--r--ydb/library/rewrapper/CMakeLists.darwin.txt21
-rw-r--r--ydb/library/rewrapper/CMakeLists.linux-aarch64.txt20
-rw-r--r--ydb/library/rewrapper/CMakeLists.linux.txt21
-rw-r--r--ydb/library/rewrapper/CMakeLists.txt15
-rw-r--r--ydb/library/rewrapper/dispatcher.cpp67
-rw-r--r--ydb/library/rewrapper/hyperscan/CMakeLists.darwin.txt27
-rw-r--r--ydb/library/rewrapper/hyperscan/CMakeLists.linux.txt27
-rw-r--r--ydb/library/rewrapper/hyperscan/CMakeLists.txt13
-rw-r--r--ydb/library/rewrapper/hyperscan/hyperscan.cpp69
-rw-r--r--ydb/library/rewrapper/proto/CMakeLists.txt31
-rw-r--r--ydb/library/rewrapper/proto/serialization.proto15
-rw-r--r--ydb/library/rewrapper/re.h31
-rw-r--r--ydb/library/rewrapper/re2/CMakeLists.txt27
-rw-r--r--ydb/library/rewrapper/re2/re2.cpp89
-rw-r--r--ydb/library/rewrapper/registrator.h28
-rw-r--r--ydb/library/rewrapper/ut/hyperscan_ut.cpp37
-rw-r--r--ydb/library/rewrapper/ut/re2_ut.cpp23
-rw-r--r--ydb/library/yql/minikql/jsonpath/CMakeLists.darwin.txt50
-rw-r--r--ydb/library/yql/minikql/jsonpath/CMakeLists.linux-aarch64.txt49
-rw-r--r--ydb/library/yql/minikql/jsonpath/CMakeLists.linux.txt50
-rw-r--r--ydb/library/yql/minikql/jsonpath/CMakeLists.txt46
-rw-r--r--ydb/library/yql/minikql/jsonpath/ast_builder.cpp29
-rw-r--r--ydb/library/yql/minikql/jsonpath/ast_nodes.cpp6
-rw-r--r--ydb/library/yql/minikql/jsonpath/ast_nodes.h8
-rw-r--r--ydb/library/yql/minikql/jsonpath/benchmark/main.cpp44
-rw-r--r--ydb/library/yql/minikql/jsonpath/binary.cpp11
-rw-r--r--ydb/library/yql/minikql/jsonpath/binary.h13
-rw-r--r--ydb/library/yql/minikql/jsonpath/executor.cpp5
-rw-r--r--ydb/library/yql/minikql/jsonpath/ut/lib_id_ut.cpp23
30 files changed, 812 insertions, 84 deletions
diff --git a/ydb/library/CMakeLists.txt b/ydb/library/CMakeLists.txt
index 5147904f2a2..0c98beb11f1 100644
--- a/ydb/library/CMakeLists.txt
+++ b/ydb/library/CMakeLists.txt
@@ -24,6 +24,7 @@ add_subdirectory(pdisk_io)
add_subdirectory(persqueue)
add_subdirectory(pretty_types_print)
add_subdirectory(protobuf_printer)
+add_subdirectory(rewrapper)
add_subdirectory(schlab)
add_subdirectory(security)
add_subdirectory(testlib)
diff --git a/ydb/library/rewrapper/CMakeLists.darwin.txt b/ydb/library/rewrapper/CMakeLists.darwin.txt
new file mode 100644
index 00000000000..f4cd86ba568
--- /dev/null
+++ b/ydb/library/rewrapper/CMakeLists.darwin.txt
@@ -0,0 +1,21 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(hyperscan)
+add_subdirectory(proto)
+add_subdirectory(re2)
+
+add_library(ydb-library-rewrapper)
+target_link_libraries(ydb-library-rewrapper PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-rewrapper-proto
+)
+target_sources(ydb-library-rewrapper PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/rewrapper/dispatcher.cpp
+)
diff --git a/ydb/library/rewrapper/CMakeLists.linux-aarch64.txt b/ydb/library/rewrapper/CMakeLists.linux-aarch64.txt
new file mode 100644
index 00000000000..b31bfe4bd77
--- /dev/null
+++ b/ydb/library/rewrapper/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,20 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(proto)
+add_subdirectory(re2)
+
+add_library(ydb-library-rewrapper)
+target_link_libraries(ydb-library-rewrapper PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-rewrapper-proto
+)
+target_sources(ydb-library-rewrapper PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/rewrapper/dispatcher.cpp
+)
diff --git a/ydb/library/rewrapper/CMakeLists.linux.txt b/ydb/library/rewrapper/CMakeLists.linux.txt
new file mode 100644
index 00000000000..f4cd86ba568
--- /dev/null
+++ b/ydb/library/rewrapper/CMakeLists.linux.txt
@@ -0,0 +1,21 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(hyperscan)
+add_subdirectory(proto)
+add_subdirectory(re2)
+
+add_library(ydb-library-rewrapper)
+target_link_libraries(ydb-library-rewrapper PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-rewrapper-proto
+)
+target_sources(ydb-library-rewrapper PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/rewrapper/dispatcher.cpp
+)
diff --git a/ydb/library/rewrapper/CMakeLists.txt b/ydb/library/rewrapper/CMakeLists.txt
new file mode 100644
index 00000000000..3e0811fb22e
--- /dev/null
+++ b/ydb/library/rewrapper/CMakeLists.txt
@@ -0,0 +1,15 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (APPLE)
+ include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux.txt)
+endif()
diff --git a/ydb/library/rewrapper/dispatcher.cpp b/ydb/library/rewrapper/dispatcher.cpp
new file mode 100644
index 00000000000..b28e52f228d
--- /dev/null
+++ b/ydb/library/rewrapper/dispatcher.cpp
@@ -0,0 +1,67 @@
+#include "registrator.h"
+#include "re.h"
+
+#include <util/generic/fwd.h>
+#include <util/generic/vector.h>
+#include <util/generic/singleton.h>
+#include <util/generic/yexception.h>
+
+#include <ydb/library/rewrapper/proto/serialization.pb.h>
+
+namespace NReWrapper {
+
+namespace NRegistrator {
+
+struct TLib {
+ ui64 Id;
+ TCompiler Compiler;
+ TDeserializer Deserializer;
+};
+
+using TModules = TVector<TLib>;
+
+TModules* GetModules() {
+ return Singleton<TModules>();
+}
+
+void AddLibrary(ui32 id, TCompiler compiler, TDeserializer deserializer) {
+ Y_VERIFY(id > 0);
+ if (GetModules()->size() < id) {
+ GetModules()->resize(id);
+ }
+ GetModules()->at(id - 1) = TLib{id, compiler, deserializer};
+}
+
+}
+
+namespace NDispatcher {
+
+void ThrowOnOutOfRange(ui32 id) {
+ if (NRegistrator::GetModules()->size() < id || id == 0) {
+ ythrow yexception()
+ << "Libs with id: " << id
+ << " was not found. Total added libs: " << NRegistrator::GetModules()->size();
+ }
+}
+
+IRePtr Deserialize(const TStringBuf& serializedRegex) {
+ TSerialization proto;
+ TString str(serializedRegex);
+ auto res = proto.ParseFromString(str);
+ if (!res) {
+ proto.SetHyperscan(str);
+ }
+
+ ui64 id = (ui64)proto.GetDataCase();;
+ ThrowOnOutOfRange(id);
+ return NRegistrator::GetModules()->at(id - 1).Deserializer(proto);
+}
+
+IRePtr Compile(const TStringBuf& regex, unsigned int flags, ui32 id) {
+ ThrowOnOutOfRange(id);
+ return NRegistrator::GetModules()->at(id - 1).Compiler(regex, flags);
+}
+
+}
+
+}
diff --git a/ydb/library/rewrapper/hyperscan/CMakeLists.darwin.txt b/ydb/library/rewrapper/hyperscan/CMakeLists.darwin.txt
new file mode 100644
index 00000000000..a586c01275c
--- /dev/null
+++ b/ydb/library/rewrapper/hyperscan/CMakeLists.darwin.txt
@@ -0,0 +1,27 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-rewrapper-hyperscan INTERFACE)
+target_link_libraries(library-rewrapper-hyperscan INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ cpp-regex-hyperscan
+ ydb-library-rewrapper
+)
+
+add_global_library_for(library-rewrapper-hyperscan.global library-rewrapper-hyperscan)
+target_link_libraries(library-rewrapper-hyperscan.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-regex-hyperscan
+ ydb-library-rewrapper
+)
+target_sources(library-rewrapper-hyperscan.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/rewrapper/hyperscan/hyperscan.cpp
+)
diff --git a/ydb/library/rewrapper/hyperscan/CMakeLists.linux.txt b/ydb/library/rewrapper/hyperscan/CMakeLists.linux.txt
new file mode 100644
index 00000000000..a586c01275c
--- /dev/null
+++ b/ydb/library/rewrapper/hyperscan/CMakeLists.linux.txt
@@ -0,0 +1,27 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-rewrapper-hyperscan INTERFACE)
+target_link_libraries(library-rewrapper-hyperscan INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ cpp-regex-hyperscan
+ ydb-library-rewrapper
+)
+
+add_global_library_for(library-rewrapper-hyperscan.global library-rewrapper-hyperscan)
+target_link_libraries(library-rewrapper-hyperscan.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-regex-hyperscan
+ ydb-library-rewrapper
+)
+target_sources(library-rewrapper-hyperscan.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/rewrapper/hyperscan/hyperscan.cpp
+)
diff --git a/ydb/library/rewrapper/hyperscan/CMakeLists.txt b/ydb/library/rewrapper/hyperscan/CMakeLists.txt
new file mode 100644
index 00000000000..79468a5d8d0
--- /dev/null
+++ b/ydb/library/rewrapper/hyperscan/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (APPLE)
+ include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux.txt)
+endif()
diff --git a/ydb/library/rewrapper/hyperscan/hyperscan.cpp b/ydb/library/rewrapper/hyperscan/hyperscan.cpp
new file mode 100644
index 00000000000..70f788dd9b9
--- /dev/null
+++ b/ydb/library/rewrapper/hyperscan/hyperscan.cpp
@@ -0,0 +1,69 @@
+#include <ydb/library/rewrapper/re.h>
+#include <ydb/library/rewrapper/registrator.h>
+#include <ydb/library/rewrapper/proto/serialization.pb.h>
+#include <library/cpp/regex/hyperscan/hyperscan.h>
+#include <util/charset/utf8.h>
+
+namespace NReWrapper {
+namespace NHyperscan {
+
+namespace {
+
+class THyperscan : public IRe {
+public:
+ THyperscan(::NHyperscan::TDatabase&& db)
+ : Database(std::move(db))
+ { }
+
+ bool Matches(const TStringBuf& text) const override {
+ if (!Scratch) {
+ Scratch = ::NHyperscan::MakeScratch(Database);
+ }
+ return ::NHyperscan::Matches(Database, Scratch, text);
+ }
+
+ TString Serialize() const override {
+ // Compatibility with old versions
+ return ::NHyperscan::Serialize(Database);
+/*
+ * TSerialization proto;
+ * proto.SetHyperscan(::NHyperscan::Serialize(Database));
+ * TString data;
+ * auto res = proto.SerializeToString(&data);
+ * Y_VERIFY(res);
+ * return data;
+ */
+ }
+private:
+ ::NHyperscan::TDatabase Database;
+ mutable ::NHyperscan::TScratch Scratch;
+};
+
+}
+
+IRePtr Compile(const TStringBuf& regex, unsigned int flags) {
+ unsigned int hyperscanFlags = 0;
+ try {
+ if (UTF8Detect(regex)) {
+ hyperscanFlags |= HS_FLAG_UTF8;
+ }
+ if (NX86::HaveAVX2()) {
+ hyperscanFlags |= HS_CPU_FEATURES_AVX2;
+ }
+ if (flags & FLAGS_CASELESS) {
+ hyperscanFlags |= HS_FLAG_CASELESS;
+ }
+ return std::make_unique<THyperscan>(::NHyperscan::Compile(regex, hyperscanFlags));
+ } catch (const ::NHyperscan::TCompileException& ex) {
+ ythrow TCompileException() << ex.what();
+ }
+}
+
+IRePtr Deserialize(const TSerialization& proto) {
+ return std::make_unique<THyperscan>(::NHyperscan::Deserialize(proto.GetHyperscan()));
+}
+
+REGISTER_RE_LIB(TSerialization::kHyperscan, Compile, Deserialize)
+
+}
+}
diff --git a/ydb/library/rewrapper/proto/CMakeLists.txt b/ydb/library/rewrapper/proto/CMakeLists.txt
new file mode 100644
index 00000000000..dc09fc44056
--- /dev/null
+++ b/ydb/library/rewrapper/proto/CMakeLists.txt
@@ -0,0 +1,31 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-rewrapper-proto)
+target_link_libraries(library-rewrapper-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(library-rewrapper-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/rewrapper/proto/serialization.proto
+)
+target_proto_addincls(library-rewrapper-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-rewrapper-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/ydb/library/rewrapper/proto/serialization.proto b/ydb/library/rewrapper/proto/serialization.proto
new file mode 100644
index 00000000000..922ec74b26e
--- /dev/null
+++ b/ydb/library/rewrapper/proto/serialization.proto
@@ -0,0 +1,15 @@
+syntax = "proto3";
+
+package NReWrapper;
+
+message TRe2Serialization {
+ string Regexp = 1;
+ uint64 Flags = 2;
+};
+
+message TSerialization {
+ oneof Data {
+ bytes Hyperscan = 1;
+ TRe2Serialization Re2 = 2;
+ }
+};
diff --git a/ydb/library/rewrapper/re.h b/ydb/library/rewrapper/re.h
new file mode 100644
index 00000000000..3f564ad1ad5
--- /dev/null
+++ b/ydb/library/rewrapper/re.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <memory>
+
+#include <util/generic/fwd.h>
+#include <util/generic/yexception.h>
+
+namespace NReWrapper {
+
+class TCompileException : public yexception {
+};
+
+enum EFlags {
+ FLAGS_CASELESS = 1,
+};
+
+class IRe {
+public:
+ virtual ~IRe() = default;
+ virtual bool Matches(const TStringBuf& text) const = 0;
+ virtual TString Serialize() const = 0;
+};
+
+using IRePtr = std::unique_ptr<IRe>;
+
+namespace NDispatcher {
+ IRePtr Compile(const TStringBuf& regex, unsigned int flags, ui32 id);
+ IRePtr Deserialize(const TStringBuf& serializedRegex);
+}
+
+}
diff --git a/ydb/library/rewrapper/re2/CMakeLists.txt b/ydb/library/rewrapper/re2/CMakeLists.txt
new file mode 100644
index 00000000000..f4496471740
--- /dev/null
+++ b/ydb/library/rewrapper/re2/CMakeLists.txt
@@ -0,0 +1,27 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-rewrapper-re2 INTERFACE)
+target_link_libraries(library-rewrapper-re2 INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-re2
+ ydb-library-rewrapper
+)
+
+add_global_library_for(library-rewrapper-re2.global library-rewrapper-re2)
+target_link_libraries(library-rewrapper-re2.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-re2
+ ydb-library-rewrapper
+)
+target_sources(library-rewrapper-re2.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/rewrapper/re2/re2.cpp
+)
diff --git a/ydb/library/rewrapper/re2/re2.cpp b/ydb/library/rewrapper/re2/re2.cpp
new file mode 100644
index 00000000000..7c8c28cd1d1
--- /dev/null
+++ b/ydb/library/rewrapper/re2/re2.cpp
@@ -0,0 +1,89 @@
+#include <ydb/library/rewrapper/re.h>
+#include <ydb/library/rewrapper/registrator.h>
+#include <ydb/library/rewrapper/proto/serialization.pb.h>
+#include <contrib/libs/re2/re2/re2.h>
+#include <util/charset/utf8.h>
+
+namespace NReWrapper {
+
+using namespace re2;
+
+namespace NRe2 {
+
+namespace {
+
+RE2::Options CreateOptions(const TStringBuf& regex, unsigned int flags) {
+ RE2::Options options;
+ bool needUtf8 = (UTF8Detect(regex) == UTF8);
+ options.set_encoding(
+ needUtf8
+ ? RE2::Options::Encoding::EncodingUTF8
+ : RE2::Options::Encoding::EncodingLatin1
+ );
+ options.set_case_sensitive(!(flags & FLAGS_CASELESS));
+ return options;
+}
+
+class TRe2 : public IRe {
+public:
+ TRe2(const TStringBuf& regex, unsigned int flags)
+ : Regexp(StringPiece(regex.data(), regex.size()), CreateOptions(regex, flags))
+ {
+ auto re2 = RawRegexp.MutableRe2();
+ re2->set_regexp(TString(regex));
+ re2->set_flags(flags);
+ }
+
+ TRe2(const TSerialization& proto)
+ : Regexp(StringPiece(proto.GetRe2().GetRegexp().data(), proto.GetRe2().GetRegexp().size()),
+ CreateOptions(proto.GetRe2().GetRegexp(), proto.GetRe2().GetFlags()))
+ , RawRegexp(proto)
+ { }
+
+ bool Matches(const TStringBuf& text) const override {
+ const StringPiece piece(text.data(), text.size());
+ RE2::Anchor anchor = RE2::UNANCHORED;
+
+ return Regexp.Match(piece, 0, text.size(), anchor, nullptr, 0);
+ }
+
+ TString Serialize() const override {
+ TString data;
+ auto res = RawRegexp.SerializeToString(&data);
+ Y_VERIFY(res);
+ return data;
+ }
+
+ bool Ok(TString* error) const {
+ if (Regexp.ok()) {
+ return true;
+ } else {
+ *error = Regexp.error();
+ return false;
+ }
+ }
+private:
+ RE2 Regexp;
+ TSerialization RawRegexp;
+};
+
+}
+
+IRePtr Compile(const TStringBuf& regex, unsigned int flags) {
+ auto ptr = std::make_unique<TRe2>(regex, flags);
+ TString error;
+ if (!ptr->Ok(&error)) {
+ ythrow TCompileException() << error;
+ }
+ return ptr;
+}
+
+IRePtr Deserialize(const TSerialization& p) {
+ return std::make_unique<TRe2>(p);
+}
+
+REGISTER_RE_LIB(TSerialization::kRe2, Compile, Deserialize)
+
+}
+
+}
diff --git a/ydb/library/rewrapper/registrator.h b/ydb/library/rewrapper/registrator.h
new file mode 100644
index 00000000000..724b529910d
--- /dev/null
+++ b/ydb/library/rewrapper/registrator.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+#define REGISTER_RE_LIB(...) \
+ namespace { \
+ struct TReWrapperStaticRegistrator { \
+ inline TReWrapperStaticRegistrator() { \
+ NRegistrator::AddLibrary(__VA_ARGS__); \
+ } \
+ } RE_REGISTRATOR; \
+ }
+
+namespace NReWrapper {
+
+class IRe;
+class TSerialization;
+using IRePtr = std::unique_ptr<IRe>;
+
+namespace NRegistrator {
+
+using TCompiler = IRePtr(*)(const TStringBuf&, unsigned int);
+using TDeserializer = IRePtr(*)(const TSerialization&);
+
+void AddLibrary(ui32 id, TCompiler compiler, TDeserializer deserializer);
+
+}
+}
diff --git a/ydb/library/rewrapper/ut/hyperscan_ut.cpp b/ydb/library/rewrapper/ut/hyperscan_ut.cpp
new file mode 100644
index 00000000000..c507d86d7ca
--- /dev/null
+++ b/ydb/library/rewrapper/ut/hyperscan_ut.cpp
@@ -0,0 +1,37 @@
+#include <ydb/library/rewrapper/re.h>
+#include <ydb/library/rewrapper/proto/serialization.pb.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/regex/hyperscan/hyperscan.h>
+
+namespace NReWrapper {
+namespace NDispatcher {
+
+Y_UNIT_TEST_SUITE(ReWrapperDispatcherTestHyperscan) {
+ Y_UNIT_TEST(LegacySerialization) {
+ unsigned int hyperscanFlags = 0;
+ hyperscanFlags |= HS_FLAG_UTF8;
+ if (NX86::HaveAVX2()) {
+ hyperscanFlags |= HS_CPU_FEATURES_AVX2;
+ }
+ auto database = ::NHyperscan::Compile("[0-9]+", hyperscanFlags);
+ auto string = ::NHyperscan::Serialize(database);
+
+ auto wrapper = Deserialize(string);
+ UNIT_ASSERT_VALUES_EQUAL(wrapper->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(wrapper->Matches("abc"), false);
+ }
+ Y_UNIT_TEST(Serialization) {
+ auto w1 = Compile("[0-9]+", 0, NReWrapper::TSerialization::kHyperscan);
+ auto string = w1->Serialize();
+
+ auto w2 = Deserialize(string);
+ UNIT_ASSERT_VALUES_EQUAL(w1->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(w1->Matches("abc"), false);
+ UNIT_ASSERT_VALUES_EQUAL(w2->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(w2->Matches("abc"), false);
+ }
+}
+
+}
+}
diff --git a/ydb/library/rewrapper/ut/re2_ut.cpp b/ydb/library/rewrapper/ut/re2_ut.cpp
new file mode 100644
index 00000000000..99c47a96217
--- /dev/null
+++ b/ydb/library/rewrapper/ut/re2_ut.cpp
@@ -0,0 +1,23 @@
+#include <ydb/library/rewrapper/re.h>
+#include <ydb/library/rewrapper/proto/serialization.pb.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace NReWrapper {
+namespace NDispatcher {
+
+Y_UNIT_TEST_SUITE(ReWrapperDispatcherRe2) {
+ Y_UNIT_TEST(Serialization) {
+ auto w1 = Compile("[0-9]+", 0, NReWrapper::TSerialization::kRe2);
+ auto string = w1->Serialize();
+
+ auto w2 = Deserialize(string);
+ UNIT_ASSERT_VALUES_EQUAL(w1->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(w1->Matches("abc"), false);
+ UNIT_ASSERT_VALUES_EQUAL(w2->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(w2->Matches("abc"), false);
+ }
+}
+
+}
+}
diff --git a/ydb/library/yql/minikql/jsonpath/CMakeLists.darwin.txt b/ydb/library/yql/minikql/jsonpath/CMakeLists.darwin.txt
new file mode 100644
index 00000000000..2d199d2573b
--- /dev/null
+++ b/ydb/library/yql/minikql/jsonpath/CMakeLists.darwin.txt
@@ -0,0 +1,50 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(benchmark)
+
+add_library(yql-minikql-jsonpath)
+target_compile_options(yql-minikql-jsonpath PRIVATE
+ -DYDB_REWRAPPER_LIB_ID=kHyperscan
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=18
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(yql-minikql-jsonpath PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-rewrapper-hyperscan
+ contrib-libs-double-conversion
+ library-cpp-json
+ library-rewrapper-re2
+ ydb-library-rewrapper
+ ydb-library-binary_json
+ yql-minikql-dom
+ yql-public-issue
+ yql-public-udf
+ library-yql-utils
+ core-issue-protos
+ yql-parser-proto_ast
+ proto_ast-gen-jsonpath
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(yql-minikql-jsonpath PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_builder.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/binary.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/executor.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/jsonpath.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/parse_double.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/type_check.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/value.cpp
+)
+generate_enum_serilization(yql-minikql-jsonpath
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_nodes.h
+ INCLUDE_HEADERS
+ ydb/library/yql/minikql/jsonpath/ast_nodes.h
+)
diff --git a/ydb/library/yql/minikql/jsonpath/CMakeLists.linux-aarch64.txt b/ydb/library/yql/minikql/jsonpath/CMakeLists.linux-aarch64.txt
new file mode 100644
index 00000000000..dea5452b8cb
--- /dev/null
+++ b/ydb/library/yql/minikql/jsonpath/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,49 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(benchmark)
+
+add_library(yql-minikql-jsonpath)
+target_compile_options(yql-minikql-jsonpath PRIVATE
+ -DYDB_REWRAPPER_LIB_ID=kRe2
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=18
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(yql-minikql-jsonpath PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-double-conversion
+ library-cpp-json
+ library-rewrapper-re2
+ ydb-library-rewrapper
+ ydb-library-binary_json
+ yql-minikql-dom
+ yql-public-issue
+ yql-public-udf
+ library-yql-utils
+ core-issue-protos
+ yql-parser-proto_ast
+ proto_ast-gen-jsonpath
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(yql-minikql-jsonpath PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_builder.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/binary.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/executor.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/jsonpath.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/parse_double.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/type_check.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/value.cpp
+)
+generate_enum_serilization(yql-minikql-jsonpath
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_nodes.h
+ INCLUDE_HEADERS
+ ydb/library/yql/minikql/jsonpath/ast_nodes.h
+)
diff --git a/ydb/library/yql/minikql/jsonpath/CMakeLists.linux.txt b/ydb/library/yql/minikql/jsonpath/CMakeLists.linux.txt
new file mode 100644
index 00000000000..2d199d2573b
--- /dev/null
+++ b/ydb/library/yql/minikql/jsonpath/CMakeLists.linux.txt
@@ -0,0 +1,50 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(benchmark)
+
+add_library(yql-minikql-jsonpath)
+target_compile_options(yql-minikql-jsonpath PRIVATE
+ -DYDB_REWRAPPER_LIB_ID=kHyperscan
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=18
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(yql-minikql-jsonpath PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-rewrapper-hyperscan
+ contrib-libs-double-conversion
+ library-cpp-json
+ library-rewrapper-re2
+ ydb-library-rewrapper
+ ydb-library-binary_json
+ yql-minikql-dom
+ yql-public-issue
+ yql-public-udf
+ library-yql-utils
+ core-issue-protos
+ yql-parser-proto_ast
+ proto_ast-gen-jsonpath
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(yql-minikql-jsonpath PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_builder.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/binary.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/executor.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/jsonpath.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/parse_double.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/type_check.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/value.cpp
+)
+generate_enum_serilization(yql-minikql-jsonpath
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_nodes.h
+ INCLUDE_HEADERS
+ ydb/library/yql/minikql/jsonpath/ast_nodes.h
+)
diff --git a/ydb/library/yql/minikql/jsonpath/CMakeLists.txt b/ydb/library/yql/minikql/jsonpath/CMakeLists.txt
index b37111afa63..3e0811fb22e 100644
--- a/ydb/library/yql/minikql/jsonpath/CMakeLists.txt
+++ b/ydb/library/yql/minikql/jsonpath/CMakeLists.txt
@@ -6,42 +6,10 @@
# original buildsystem will not be accepted.
-add_subdirectory(benchmark)
-
-add_library(yql-minikql-jsonpath)
-target_compile_options(yql-minikql-jsonpath PRIVATE
- -DUDF_ABI_VERSION_MAJOR=2
- -DUDF_ABI_VERSION_MINOR=18
- -DUDF_ABI_VERSION_PATCH=0
-)
-target_link_libraries(yql-minikql-jsonpath PUBLIC
- contrib-libs-cxxsupp
- yutil
- contrib-libs-double-conversion
- library-cpp-json
- cpp-regex-hyperscan
- ydb-library-binary_json
- yql-minikql-dom
- yql-public-issue
- yql-public-udf
- library-yql-utils
- core-issue-protos
- yql-parser-proto_ast
- proto_ast-gen-jsonpath
- tools-enum_parser-enum_serialization_runtime
-)
-target_sources(yql-minikql-jsonpath PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_builder.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/binary.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/executor.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/jsonpath.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/parse_double.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/type_check.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/value.cpp
-)
-generate_enum_serilization(yql-minikql-jsonpath
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/jsonpath/ast_nodes.h
- INCLUDE_HEADERS
- ydb/library/yql/minikql/jsonpath/ast_nodes.h
-)
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (APPLE)
+ include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux.txt)
+endif()
diff --git a/ydb/library/yql/minikql/jsonpath/ast_builder.cpp b/ydb/library/yql/minikql/jsonpath/ast_builder.cpp
index c13a2264c19..f6bed23d82c 100644
--- a/ydb/library/yql/minikql/jsonpath/ast_builder.cpp
+++ b/ydb/library/yql/minikql/jsonpath/ast_builder.cpp
@@ -3,8 +3,7 @@
#include "parse_double.h"
#include <ydb/library/yql/core/issue/protos/issue_id.pb.h>
-
-#include <library/cpp/regex/hyperscan/hyperscan.h>
+#include <ydb/library/rewrapper/proto/serialization.pb.h>
#include <util/generic/singleton.h>
#include <util/system/compiler.h>
@@ -18,10 +17,12 @@
using namespace NYql;
using namespace NYql::NJsonPath;
using namespace NJsonPathGenerated;
-using namespace NHyperscan;
+using namespace NReWrapper;
namespace {
+constexpr ui32 RegexpLibId = NReWrapper::TSerialization::YDB_REWRAPPER_LIB_ID;
+
TPosition GetPos(const TToken& token) {
return TPosition(token.GetColumn(), token.GetLine());
}
@@ -267,7 +268,7 @@ TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, T
for (char flag : flags) {
switch (flag) {
case 'i':
- parsedFlags |= HS_FLAG_CASELESS;
+ parsedFlags |= FLAGS_CASELESS;
break;
default:
Error(GetPos(flagsToken), TStringBuilder() << "Unsupported regex flag '" << flag << "'");
@@ -275,17 +276,11 @@ TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, T
}
}
}
- if (UTF8Detect(regex)) {
- parsedFlags |= HS_FLAG_UTF8;
- }
- if (NX86::HaveAVX2()) {
- parsedFlags |= HS_CPU_FEATURES_AVX2;
- }
- TDatabase compiledRegex;
+ IRePtr compiledRegex;
try {
- compiledRegex = Compile(regex, parsedFlags);
- } catch (const TCompileException& e) {
+ compiledRegex = NDispatcher::Compile(regex, parsedFlags, RegexpLibId);
+ } catch (const NReWrapper::TCompileException& e) {
Error(GetPos(regexToken), e.AsStrBuf());
return nullptr;
}
@@ -480,3 +475,11 @@ TAstNodePtr TAstBuilder::BuildJsonPath(const TRule_jsonpath& node) {
TAstNodePtr TAstBuilder::Build(const TJsonPathParserAST& ast) {
return BuildJsonPath(ast.GetRule_jsonpath());
}
+
+namespace NYql::NJsonPath {
+
+ui32 GetReLibId() {
+ return RegexpLibId;
+}
+
+}
diff --git a/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp b/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp
index 0e7dee2e22e..5a51c2e90e2 100644
--- a/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp
+++ b/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp
@@ -357,7 +357,7 @@ void TIsUnknownPredicateNode::Accept(IAstNodeVisitor& visitor) const {
return visitor.VisitIsUnknownPredicate(*this);
}
-TLikeRegexPredicateNode::TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NHyperscan::TDatabase&& regex)
+TLikeRegexPredicateNode::TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NReWrapper::IRePtr&& regex)
: TAstNode(pos)
, Input(input)
, Regex(std::move(regex))
@@ -368,7 +368,7 @@ const TAstNodePtr TLikeRegexPredicateNode::GetInput() const {
return Input;
}
-const NHyperscan::TDatabase& TLikeRegexPredicateNode::GetRegex() const {
+const NReWrapper::IRePtr& TLikeRegexPredicateNode::GetRegex() const {
return Regex;
}
@@ -380,4 +380,4 @@ void TLikeRegexPredicateNode::Accept(IAstNodeVisitor& visitor) const {
return visitor.VisitLikeRegexPredicate(*this);
}
-} \ No newline at end of file
+}
diff --git a/ydb/library/yql/minikql/jsonpath/ast_nodes.h b/ydb/library/yql/minikql/jsonpath/ast_nodes.h
index f9129ef1dd5..863e3c4fa26 100644
--- a/ydb/library/yql/minikql/jsonpath/ast_nodes.h
+++ b/ydb/library/yql/minikql/jsonpath/ast_nodes.h
@@ -3,7 +3,7 @@
#include <ydb/library/yql/public/issue/yql_issue.h>
#include <library/cpp/json/json_value.h>
-#include <library/cpp/regex/hyperscan/hyperscan.h>
+#include <ydb/library/rewrapper/re.h>
namespace NYql::NJsonPath {
@@ -383,11 +383,11 @@ private:
class TLikeRegexPredicateNode : public TAstNode {
public:
- TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NHyperscan::TDatabase&& regex);
+ TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NReWrapper::IRePtr&& regex);
const TAstNodePtr GetInput() const;
- const NHyperscan::TDatabase& GetRegex() const;
+ const NReWrapper::IRePtr& GetRegex() const;
EReturnType GetReturnType() const override;
@@ -395,7 +395,7 @@ public:
private:
TAstNodePtr Input;
- NHyperscan::TDatabase Regex;
+ NReWrapper::IRePtr Regex;
};
}
diff --git a/ydb/library/yql/minikql/jsonpath/benchmark/main.cpp b/ydb/library/yql/minikql/jsonpath/benchmark/main.cpp
index a3a1f10f2d6..613b1639dbc 100644
--- a/ydb/library/yql/minikql/jsonpath/benchmark/main.cpp
+++ b/ydb/library/yql/minikql/jsonpath/benchmark/main.cpp
@@ -62,13 +62,17 @@ TString GenerateRandomJson() {
const size_t MAX_PARSE_ERRORS = 100;
+#define PREPARE() \
+ TIntrusivePtr<IFunctionRegistry> FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())); \
+ TScopedAlloc Alloc(__LOCATION__); \
+ TTypeEnvironment Env(Alloc); \
+ TMemoryUsageInfo MemInfo("Memory"); \
+ THolderFactory HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()); \
+ TDefaultValueBuilder ValueBuilder(HolderFactory); \
+
+
Y_CPU_BENCHMARK(JsonPath, iface) {
- TIntrusivePtr<IFunctionRegistry> FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry()));
- TScopedAlloc Alloc(__LOCATION__);
- TTypeEnvironment Env(Alloc);
- TMemoryUsageInfo MemInfo("Memory");
- THolderFactory HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get());
- TDefaultValueBuilder ValueBuilder(HolderFactory);
+ PREPARE()
const TString json = GenerateRandomJson();
const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder);
@@ -80,3 +84,31 @@ Y_CPU_BENCHMARK(JsonPath, iface) {
Y_VERIFY(!result.IsError());
}
}
+
+Y_CPU_BENCHMARK(JsonPathLikeRegexWithCompile, iface) {
+ PREPARE()
+
+ const TString json = GenerateRandomJson();
+ const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder);
+
+ for (size_t i = 0; i < iface.Iterations(); i++) {
+ TIssues issues;
+ const auto jsonPath = ParseJsonPath("$[*] like_regex \"[0-9]+\"", issues, MAX_PARSE_ERRORS);
+ const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder);
+ Y_VERIFY(!result.IsError());
+ }
+}
+
+Y_CPU_BENCHMARK(JsonPathLikeRegex, iface) {
+ PREPARE()
+
+ const TString json = GenerateRandomJson();
+ const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder);
+
+ TIssues issues;
+ const auto jsonPath = ParseJsonPath("$[*] like_regex \"[0-9]+\"", issues, MAX_PARSE_ERRORS);
+ for (size_t i = 0; i < iface.Iterations(); i++) {
+ const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder);
+ Y_VERIFY(!result.IsError());
+ }
+}
diff --git a/ydb/library/yql/minikql/jsonpath/binary.cpp b/ydb/library/yql/minikql/jsonpath/binary.cpp
index 1d159599b85..cb6421739d0 100644
--- a/ydb/library/yql/minikql/jsonpath/binary.cpp
+++ b/ydb/library/yql/minikql/jsonpath/binary.cpp
@@ -36,8 +36,8 @@ TStartsWithPrefixOffset TJsonPathItem::GetStartsWithPrefixOffset() const {
return std::get<TStartsWithPrefixOffset>(Data);
}
-const THyperscanRegex& TJsonPathItem::GetRegex() const {
- return std::get<THyperscanRegex>(Data);
+const NReWrapper::IRePtr& TJsonPathItem::GetRegex() const {
+ return std::get<NReWrapper::IRePtr>(Data);
}
TJsonPathReader::TJsonPathReader(const TJsonPathPtr path)
@@ -161,9 +161,8 @@ const TJsonPathItem& TJsonPathReader::ReadFromPos(TUint pos) {
case EJsonPathItemType::LikeRegexPredicate: {
const auto serializedRegex = ReadString(pos);
- THyperscanRegex regex;
- regex.Regex = NHyperscan::Deserialize(serializedRegex);
- regex.Scratch = NHyperscan::MakeScratch(regex.Regex);
+
+ auto regex = NReWrapper::NDispatcher::Deserialize(serializedRegex);
result.Data = std::move(regex);
result.InputItemOffset = ReadUint(pos);
break;
@@ -478,7 +477,7 @@ void TJsonPathBuilder::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& no
WriteType(EJsonPathItemType::LikeRegexPredicate);
WritePos(node);
- const TString serializedRegex = NHyperscan::Serialize(node.GetRegex());
+ const TString serializedRegex = node.GetRegex()->Serialize();
WriteString(serializedRegex);
WriteNextPosition();
diff --git a/ydb/library/yql/minikql/jsonpath/binary.h b/ydb/library/yql/minikql/jsonpath/binary.h
index 29ae84c741c..946fe7cdbb3 100644
--- a/ydb/library/yql/minikql/jsonpath/binary.h
+++ b/ydb/library/yql/minikql/jsonpath/binary.h
@@ -2,7 +2,7 @@
#include "ast_nodes.h"
-#include <library/cpp/regex/hyperscan/hyperscan.h>
+#include <ydb/library/rewrapper/re.h>
#include <util/system/unaligned_mem.h>
#include <util/generic/buffer.h>
@@ -84,11 +84,6 @@ struct TStartsWithPrefixOffset {
TUint Offset = 0;
};
-struct THyperscanRegex {
- NHyperscan::TDatabase Regex;
- NHyperscan::TScratch Scratch;
-};
-
struct TJsonPathItem {
// Position in the source jsonpath
TPosition Pos;
@@ -109,7 +104,7 @@ struct TJsonPathItem {
TBinaryOpArgumentsOffset,
TFilterPredicateOffset,
TStartsWithPrefixOffset,
- THyperscanRegex,
+ NReWrapper::IRePtr,
double,
bool
> Data;
@@ -117,7 +112,7 @@ struct TJsonPathItem {
const TStringBuf GetString() const;
const TVector<TArraySubscriptOffsets>& GetSubscripts() const;
const TBinaryOpArgumentsOffset& GetBinaryOpArguments() const;
- const THyperscanRegex& GetRegex() const;
+ const NReWrapper::IRePtr& GetRegex() const;
double GetNumber() const;
bool GetBoolean() const;
TFilterPredicateOffset GetFilterPredicateOffset() const;
@@ -277,4 +272,4 @@ private:
THashMap<TUint, TJsonPathItem> ItemCache;
};
-} \ No newline at end of file
+}
diff --git a/ydb/library/yql/minikql/jsonpath/executor.cpp b/ydb/library/yql/minikql/jsonpath/executor.cpp
index a559a38278b..a13d4523d39 100644
--- a/ydb/library/yql/minikql/jsonpath/executor.cpp
+++ b/ydb/library/yql/minikql/jsonpath/executor.cpp
@@ -4,8 +4,6 @@
#include <ydb/library/yql/core/issue/protos/issue_id.pb.h>
#include <ydb/library/yql/minikql/dom/node.h>
-#include <library/cpp/regex/hyperscan/hyperscan.h>
-
#include <util/generic/scope.h>
#include <util/generic/maybe.h>
#include <util/system/compiler.h>
@@ -17,7 +15,6 @@ namespace NYql::NJsonPath {
using namespace NJson;
using namespace NUdf;
using namespace NDom;
-using namespace NHyperscan;
namespace {
@@ -1008,7 +1005,7 @@ TResult TExecutor::LikeRegexPredicate(const TJsonPathItem& item) {
bool found = false;
for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
if (node.IsString()) {
- found |= Matches(regex.Regex, regex.Scratch, node.GetString());
+ found |= regex->Matches(node.GetString());
} else {
error = true;
}
diff --git a/ydb/library/yql/minikql/jsonpath/ut/lib_id_ut.cpp b/ydb/library/yql/minikql/jsonpath/ut/lib_id_ut.cpp
new file mode 100644
index 00000000000..0d48e57c76a
--- /dev/null
+++ b/ydb/library/yql/minikql/jsonpath/ut/lib_id_ut.cpp
@@ -0,0 +1,23 @@
+#include <library/cpp/testing/unittest/registar.h>
+#include <ydb/library/rewrapper/proto/serialization.pb.h>
+#include <util/system/platform.h>
+
+/*
+ * Paranoid test to check correct regexp library is used
+ */
+
+namespace NYql::NJsonPath {
+
+extern ui32 GetReLibId();
+
+Y_UNIT_TEST_SUITE(RegexpLib) {
+ Y_UNIT_TEST(DefaultLib) {
+#ifdef __x86_64__
+ UNIT_ASSERT_VALUES_EQUAL(GetReLibId(), (ui32)NReWrapper::TSerialization::kHyperscan);
+#else
+ UNIT_ASSERT_VALUES_EQUAL(GetReLibId(), (ui32)NReWrapper::TSerialization::kRe2);
+#endif
+ }
+}
+
+}