summaryrefslogtreecommitdiffstats
path: root/yql/essentials/minikql/jsonpath/rewrapper/re2/re2.cpp
blob: 2c97b85123b2f0141df8cbdebf54f2ec29aee12f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
#include <yql/essentials/minikql/jsonpath/rewrapper/registrator.h>
#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
#include <contrib/libs/re2/re2/re2.h>
#include <util/charset/utf8.h>

namespace NReWrapper {

using namespace re2;

namespace NRe2 {

namespace {

RE2::Options CreateOptions(const TStringBuf& regex, unsigned int flags) {
    RE2::Options options;
    bool needUtf8 = (UTF8Detect(regex) == UTF8);
    options.set_encoding(
        needUtf8
            ? RE2::Options::Encoding::EncodingUTF8
            : RE2::Options::Encoding::EncodingLatin1);
    options.set_case_sensitive(!(flags & FLAGS_CASELESS));
    return options;
}

class TRe2: public IRe {
public:
    TRe2(const TStringBuf& regex, unsigned int flags)
        : Regexp_(StringPiece(regex.data(), regex.size()), CreateOptions(regex, flags))
    {
        auto re2 = RawRegexp_.MutableRe2();
        re2->set_regexp(TString(regex));
        re2->set_flags(flags);
    }

    TRe2(const TSerialization& proto)
        : Regexp_(StringPiece(proto.GetRe2().GetRegexp().data(), proto.GetRe2().GetRegexp().size()),
                  CreateOptions(proto.GetRe2().GetRegexp(), proto.GetRe2().GetFlags()))
        , RawRegexp_(proto)
    {
    }

    bool Matches(const TStringBuf& text) const override {
        const StringPiece piece(text.data(), text.size());
        RE2::Anchor anchor = RE2::UNANCHORED;

        return Regexp_.Match(piece, 0, text.size(), anchor, nullptr, 0);
    }

    TString Serialize() const override {
        TString data;
        auto res = RawRegexp_.SerializeToString(&data);
        Y_ABORT_UNLESS(res);
        return data;
    }

    bool Ok(TString* error) const {
        if (Regexp_.ok()) {
            return true;
        } else {
            *error = Regexp_.error();
            return false;
        }
    }

private:
    RE2 Regexp_;
    TSerialization RawRegexp_;
};

} // namespace

IRePtr Compile(const TStringBuf& regex, unsigned int flags) {
    auto ptr = std::make_unique<TRe2>(regex, flags);
    TString error;
    if (!ptr->Ok(&error)) {
        ythrow TCompileException() << error;
    }
    return ptr;
}

IRePtr Deserialize(const TSerialization& p) {
    return std::make_unique<TRe2>(p);
}

REGISTER_RE_LIB(TSerialization::kRe2, Compile, Deserialize)

} // namespace NRe2

} // namespace NReWrapper