blob: a4c0fa3a13aa8ba58675ffe7463969844662362e (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
#include <yql/essentials/minikql/jsonpath/rewrapper/registrator.h>
#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
#include <library/cpp/regex/hyperscan/hyperscan.h>
#include <util/charset/utf8.h>
namespace NReWrapper {
namespace NHyperscan {
namespace {
class THyperscan : public IRe {
public:
THyperscan(::NHyperscan::TDatabase&& db)
: Database_(std::move(db))
{ }
bool Matches(const TStringBuf& text) const override {
if (!Scratch_) {
Scratch_ = ::NHyperscan::MakeScratch(Database_);
}
return ::NHyperscan::Matches(Database_, Scratch_, text);
}
TString Serialize() const override {
// Compatibility with old versions
return ::NHyperscan::Serialize(Database_);
/*
* TSerialization proto;
* proto.SetHyperscan(::NHyperscan::Serialize(Database));
* TString data;
* auto res = proto.SerializeToString(&data);
* Y_ABORT_UNLESS(res);
* return data;
*/
}
private:
::NHyperscan::TDatabase Database_;
mutable ::NHyperscan::TScratch Scratch_;
};
}
IRePtr Compile(const TStringBuf& regex, unsigned int flags) {
unsigned int hyperscanFlags = 0;
try {
if (UTF8Detect(regex)) {
hyperscanFlags |= HS_FLAG_UTF8;
}
if (NX86::HaveAVX2()) {
hyperscanFlags |= HS_CPU_FEATURES_AVX2;
}
if (flags & FLAGS_CASELESS) {
hyperscanFlags |= HS_FLAG_CASELESS;
}
return std::make_unique<THyperscan>(::NHyperscan::Compile(regex, hyperscanFlags));
} catch (const ::NHyperscan::TCompileException& ex) {
ythrow TCompileException() << ex.what();
}
}
IRePtr Deserialize(const TSerialization& proto) {
return std::make_unique<THyperscan>(::NHyperscan::Deserialize(proto.GetHyperscan()));
}
REGISTER_RE_LIB(TSerialization::kHyperscan, Compile, Deserialize)
}
}
|