diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp')
-rw-r--r-- | library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp new file mode 100644 index 0000000000..cf0f78637f --- /dev/null +++ b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp @@ -0,0 +1,190 @@ +#include "levenshtein_diff.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/string.h> + +namespace { + + float unaryZeroWeigher(const char&) { + return 0.0f; + }; + + float unaryMaxWeigher(const char&) { + return 1.0f; + }; + + float binaryZeroWeigher(const char&, const char&) { + return 0.0f; + }; + + float binaryMaxWeigher(const char&, const char&) { + return 1.0f; + }; + +} + +Y_UNIT_TEST_SUITE(Levenstein) { + Y_UNIT_TEST(Distance) { + UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("hello"), TStringBuf("hulloah")), 3); + UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("yeoman"), TStringBuf("yo man")), 2); + } +} + +Y_UNIT_TEST_SUITE(WeightedLevenstein) { + Y_UNIT_TEST(EqualStrings) { + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("12345"), TString("12345"), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher); + UNIT_ASSERT_VALUES_EQUAL(distance, 0.0f); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); + } + + Y_UNIT_TEST(EmptyStrings) { + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString(""), TString(""), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher); + UNIT_ASSERT_VALUES_EQUAL(distance, 0.0f); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 0); + } + + Y_UNIT_TEST(InsertsOnly) { + auto unaryWeigher = [](const char&) { + return 2.0f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString(""), TString("12345"), chain, &distance, binaryZeroWeigher, unaryZeroWeigher, unaryWeigher); + UNIT_ASSERT_VALUES_EQUAL(distance, 10.0f); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); + } + + Y_UNIT_TEST(DeletionsOnly) { + auto unaryWeigher = [](const char&) { + return 3.0f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("54321"), TString(""), chain, &distance, binaryZeroWeigher, unaryWeigher, unaryZeroWeigher); + UNIT_ASSERT_VALUES_EQUAL(distance, 15.0f); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); + } + + Y_UNIT_TEST(SymmetryCheck) { + const TString str1 = "123x5"; + const TString str2 = "x2345"; + const float trgDistance = 2.0f; + const size_t trgChainLen = 5; + + NLevenshtein::TEditChain chainLeftRight; + float distanceLeftRight = 0.0f; + NLevenshtein::GetEditChain(str1, str2, chainLeftRight, &distanceLeftRight, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher); + UNIT_ASSERT_VALUES_EQUAL(distanceLeftRight, trgDistance); + UNIT_ASSERT_VALUES_EQUAL(chainLeftRight.size(), trgChainLen); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[0]), static_cast<int>(NLevenshtein::EMT_REPLACE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[1]), static_cast<int>(NLevenshtein::EMT_PRESERVE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[2]), static_cast<int>(NLevenshtein::EMT_PRESERVE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[3]), static_cast<int>(NLevenshtein::EMT_REPLACE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[4]), static_cast<int>(NLevenshtein::EMT_PRESERVE)); + + NLevenshtein::TEditChain chainRightLeft; + float distanceRightLeft = 0.0f; + NLevenshtein::GetEditChain(str2, str1, chainRightLeft, &distanceRightLeft, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher); + UNIT_ASSERT_VALUES_EQUAL(distanceRightLeft, trgDistance); + UNIT_ASSERT_VALUES_EQUAL(chainRightLeft.size(), trgChainLen); + UNIT_ASSERT(chainRightLeft == chainLeftRight); + } + + Y_UNIT_TEST(PreferReplacements) { + auto binaryWeigher = [](const char&, const char&) { + return 0.0625f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("54321"), TString("43210"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher); + UNIT_ASSERT_VALUES_EQUAL(distance, 0.3125f); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); + } + + Y_UNIT_TEST(PreferInsertDeletions) { + auto unaryWeigher = [](const char&) { + return 0.0625f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("54321"), TString("98765"), chain, &distance, binaryMaxWeigher, unaryWeigher, unaryWeigher); + UNIT_ASSERT_VALUES_EQUAL(distance, 0.5f); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 9); + } + + Y_UNIT_TEST(NoXDeletions) { + auto unaryWeigher = [](const char& c) { + return c == 'x' ? 100.0f : 1.0f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("543x1"), TString("5431"), chain, &distance, binaryMaxWeigher, unaryWeigher, unaryMaxWeigher); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[3]), static_cast<int>(NLevenshtein::EMT_REPLACE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[4]), static_cast<int>(NLevenshtein::EMT_DELETE)); + UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f); + } + + Y_UNIT_TEST(NoXInsertions) { + auto unaryWeigher = [](const char& c) { + return c == 'x' ? 100.0f : 1.0f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("5431"), TString("543x1"), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryWeigher); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[3]), static_cast<int>(NLevenshtein::EMT_REPLACE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[4]), static_cast<int>(NLevenshtein::EMT_INSERT)); + UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f); + } + + Y_UNIT_TEST(NoReplacementsOfX) { + auto binaryWeigher = [](const char& l, const char&) { + return l == 'x' ? 100.0f : 1.0f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("5432x"), TString("5432y"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 6); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[4]), static_cast<int>(NLevenshtein::EMT_DELETE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[5]), static_cast<int>(NLevenshtein::EMT_INSERT)); + UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f); + } + + Y_UNIT_TEST(NoReplacementsForX) { + auto binaryWeigher = [](const char&, const char& r) { + return r == 'x' ? 100.0f : 1.0f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("y4321"), TString("x4321"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 6); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[0]), static_cast<int>(NLevenshtein::EMT_DELETE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[1]), static_cast<int>(NLevenshtein::EMT_INSERT)); + UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f); + } + + Y_UNIT_TEST(SimilarOperationPriorities) { + auto replaceWeigher = [](const char&, const char&) { + return 0.5f; + }; + auto deleteWeigher = [](const char&) { + return 0.2f; + }; + auto insertWeigher = [](const char&) { + return 0.9f; + }; + NLevenshtein::TEditChain chain; + float distance = 0.0f; + NLevenshtein::GetEditChain(TString("y0"), TString("0x"), chain, &distance, replaceWeigher, deleteWeigher, insertWeigher); + UNIT_ASSERT_VALUES_EQUAL(chain.size(), 2); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[0]), static_cast<int>(NLevenshtein::EMT_REPLACE)); + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[1]), static_cast<int>(NLevenshtein::EMT_REPLACE)); + UNIT_ASSERT_VALUES_EQUAL(distance, 1.0f); + } +} |