diff options
| author | Alexander Smirnov <[email protected]> | 2025-01-11 00:21:49 +0000 | 
|---|---|---|
| committer | Alexander Smirnov <[email protected]> | 2025-01-11 00:21:49 +0000 | 
| commit | 457aacf7daabd8837feef98d1edcfe62420a1f47 (patch) | |
| tree | 3f8ca7735aac2ab4574833bf4ea5e1881a02ef84 /library/cpp/regex | |
| parent | af411bb10f1133d6e7f4c6324a89dde2f745d675 (diff) | |
| parent | 2d3b7f1966f9716551a0d7db72a9608addab8ecf (diff) | |
Merge branch 'rightlib' into merge-libs-250111-0020
Diffstat (limited to 'library/cpp/regex')
| -rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 107 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 26 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 59 | 
3 files changed, 192 insertions, 0 deletions
| diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp index 0a4bfcb9ec5..bfb9d7b5ff9 100644 --- a/library/cpp/regex/hyperscan/hyperscan.cpp +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -96,6 +96,27 @@ namespace NHyperscan {              return db;          } +        TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags, hs_platform_info_t* platform) { +            hs_database_t* rawDb = nullptr; +            hs_compile_error_t* rawCompileErr = nullptr; +            hs_error_t status = hs_compile_lit( +                    literal.data(), +                    flags, +                    literal.size(), +                    HS_MODE_BLOCK, +                    platform, +                    &rawDb, +                    &rawCompileErr); +            TDatabase db(rawDb); +            NHyperscan::TCompileError compileError(rawCompileErr); +            if (status != HS_SUCCESS) { +                ythrow TCompileException() +                        << "Failed to compile literal: " << literal << ". " +                        << "Error message (hyperscan): " << compileError->message; +            } +            return db; +        } +          TDatabase CompileMulti(                  const TVector<const char*>& regexs,                  const TVector<unsigned int>& flags, @@ -150,6 +171,61 @@ namespace NHyperscan {              return db;          } +        TDatabase CompileMultiLiteral( +            const TVector<const char*>& literals, +            const TVector<unsigned int>& flags, +            const TVector<unsigned int>& ids, +            const TVector<size_t>& lens, +            hs_platform_info_t* platform) +        { +            unsigned int count = literals.size(); +            if (flags.size() != count) { +                ythrow yexception() +                        << "Mismatch of sizes vectors passed to CompileMultiLiteral. " +                        << "size(literals) = " << literals.size() << ". " +                        << "size(flags) = " << flags.size() << "."; +            } +            if (ids.size() != count) { +                ythrow yexception() +                        << "Mismatch of sizes vectors passed to CompileMultiLiteral. " +                        << "size(literals) = " << literals.size() << ". " +                        << "size(ids) = " << ids.size() << "."; +            } +            if (lens.size() != count) { +                ythrow yexception() +                        << "Mismatch of sizes vectors passed to CompileMultiLiteral. " +                        << "size(literals) = " << literals.size() << ". " +                        << "size(lens) = " << lens.size() << "."; +            } +            hs_database_t* rawDb = nullptr; +            hs_compile_error_t* rawCompileErr = nullptr; +            hs_error_t status = hs_compile_lit_multi( +                    literals.data(), +                    flags.data(), +                    ids.data(), +                    lens.data(), +                    count, +                    HS_MODE_BLOCK, +                    platform, +                    &rawDb, +                    &rawCompileErr); +            TDatabase db(rawDb); +            NHyperscan::TCompileError compileError(rawCompileErr); +            if (status != HS_SUCCESS) { +                if (compileError->expression >= 0) { +                    const char* literal = literals[compileError->expression]; +                    ythrow TCompileException() +                            << "Failed to compile literal: " << literal << ". " +                            << "Error message (hyperscan): " << compileError->message; +                } else { +                    ythrow TCompileException() +                            << "Failed to compile multiple literals. " +                            << "Error message (hyperscan): " << compileError->message; +                } +            } +            return db; +        } +          bool Matches(                  const TDatabase& db,                  const TScratch& scratch, @@ -180,6 +256,16 @@ namespace NHyperscan {          return NPrivate::Compile(regex, flags, &platformInfo);      } +    TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags) { +        auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); +        return NPrivate::CompileLiteral(literal, flags, &platformInfo); +    } + +    TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags, TCPUFeatures cpuFeatures) { +        auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); +        return NPrivate::CompileLiteral(literal, flags, &platformInfo); +    } +      TDatabase CompileMulti(              const TVector<const char*>& regexs,              const TVector<unsigned int>& flags, @@ -201,6 +287,27 @@ namespace NHyperscan {          return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);      } +    TDatabase CompileMultiLiteral( +        const TVector<const char*>& literals, +        const TVector<unsigned int>& flags, +        const TVector<unsigned int>& ids, +        const TVector<size_t>& lens) +    { +        auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); +        return NPrivate::CompileMultiLiteral(literals, flags, ids, lens, &platformInfo); +    } + +    TDatabase CompileMultiLiteral( +        const TVector<const char*>& literals, +        const TVector<unsigned int>& flags, +        const TVector<unsigned int>& ids, +        const TVector<size_t>& lens, +        TCPUFeatures cpuFeatures) +    { +        auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); +        return NPrivate::CompileMultiLiteral(literals, flags, ids, lens, &platformInfo); +    } +      TScratch MakeScratch(const TDatabase& db) {          hs_scratch_t* rawScratch = nullptr;          hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h index eae82fa3842..7f8c877b076 100644 --- a/library/cpp/regex/hyperscan/hyperscan.h +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -60,6 +60,8 @@ namespace NHyperscan {          TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform); +        TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags, hs_platform_info_t* platform); +          TDatabase CompileMulti(              const TVector<const char*>& regexs,              const TVector<unsigned int>& flags, @@ -67,6 +69,13 @@ namespace NHyperscan {              hs_platform_info_t* platform,              const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); +        TDatabase CompileMultiLiteral( +            const TVector<const char*>& literals, +            const TVector<unsigned int>& flags, +            const TVector<unsigned int>& ids, +            const TVector<size_t>& lens, +            hs_platform_info_t* platform); +          // We need to parametrize Scan and Matches functions for testing purposes          template<typename TCallback>          void Scan( @@ -118,6 +127,10 @@ namespace NHyperscan {      TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); +    TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags); + +    TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags, TCPUFeatures cpuFeatures); +      TDatabase CompileMulti(          const TVector<const char*>& regexs,          const TVector<unsigned int>& flags, @@ -131,6 +144,19 @@ namespace NHyperscan {          TCPUFeatures cpuFeatures,          const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); +    TDatabase CompileMultiLiteral( +        const TVector<const char*>& literals, +        const TVector<unsigned int>& flags, +        const TVector<unsigned int>& ids, +        const TVector<size_t>& lens); + +    TDatabase CompileMultiLiteral( +        const TVector<const char*>& literals, +        const TVector<unsigned int>& flags, +        const TVector<unsigned int>& ids, +        const TVector<size_t>& lens, +        TCPUFeatures cpuFeatures); +      TScratch MakeScratch(const TDatabase& db);      void GrowScratch(TScratch& scratch, const TDatabase& db); diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 75cd0bcc897..063ca3dd035 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -27,6 +27,22 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {          UNIT_ASSERT_EQUAL(foundId, 0);      } +    Y_UNIT_TEST(CompileLiteralAndScan) { +        TDatabase db = CompileLiteral("a.c?[)", HS_FLAG_SINGLEMATCH); +        TScratch scratch = MakeScratch(db); + +        unsigned int foundId = 42; +        auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { +            foundId = id; +        }; +        NHyperscan::Scan( +            db, +            scratch, +            "a.c?[)", +            callback); +        UNIT_ASSERT_EQUAL(foundId, 0); +    } +      Y_UNIT_TEST(Matches) {          NHyperscan::TDatabase db = NHyperscan::Compile(              "a.c", @@ -71,6 +87,49 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {          UNIT_ASSERT(foundIds.contains(241));      } +    Y_UNIT_TEST(MultiLiteral) { +        static const TVector<TString> LITERALS = { +             "foo.", +             "bar.", +        }; +        NHyperscan::TDatabase db = NHyperscan::CompileMultiLiteral( +            { +                LITERALS[0].c_str(), +                LITERALS[1].c_str(), +            }, +            { +                HS_FLAG_SINGLEMATCH, +                HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, +            }, +            { +                42, +                241, +            }, +            { +                LITERALS[0].size(), +                LITERALS[1].size(), +            }); +        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + +        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo.")); +        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "bar.")); +        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "BAR.")); +        UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "FOO.")); + +        TSet<unsigned int> foundIds; +        auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { +            foundIds.insert(id); +        }; +        NHyperscan::Scan( +            db, +            scratch, +            "foo.BaR.", +            callback); +        UNIT_ASSERT_EQUAL(foundIds.size(), 2); +        UNIT_ASSERT(foundIds.contains(42)); +        UNIT_ASSERT(foundIds.contains(241)); +    } +      // https://ml.yandex-team.ru/thread/2370000002965712422/      Y_UNIT_TEST(MultiRegression) {          NHyperscan::CompileMulti( | 
