diff options
author | vmordovin <vmordovin@yandex-team.com> | 2024-07-27 14:31:54 +0300 |
---|---|---|
committer | vmordovin <vmordovin@yandex-team.com> | 2024-07-27 14:41:30 +0300 |
commit | 72c283db0b8f30e61ac1e4643ad530f7c027dac0 (patch) | |
tree | 364be474ebdbeb7da9ede834b2aa8b470aa0af1f | |
parent | 271bf6a41232dc7ad1f5e13fd33871400f27bd98 (diff) | |
download | ydb-72c283db0b8f30e61ac1e4643ad530f7c027dac0.tar.gz |
Move code to library
Начал с того, что просто унес ```url.*``` из ```search/meta``` в ```search/meta/generic```. Закончилось тем, что еще и поотпиливал странные зависимости ```kernel/facts/credibility/database->search/meta``` и ```quality/functionality/facts/credibility/url_trie_map_lib->search/meta``` : общие библитотеки не имеют права зависеть от ```search/meta/*```
c1e3dbc38eff7073949a371f5fc59714eeb9306e
-rw-r--r-- | library/cpp/string_utils/url/url.cpp | 34 | ||||
-rw-r--r-- | library/cpp/string_utils/url/url.h | 6 |
2 files changed, 39 insertions, 1 deletions
diff --git a/library/cpp/string_utils/url/url.cpp b/library/cpp/string_utils/url/url.cpp index c850afa5a7..4db902cf65 100644 --- a/library/cpp/string_utils/url/url.cpp +++ b/library/cpp/string_utils/url/url.cpp @@ -71,6 +71,40 @@ namespace NUrl { return {host, path}; } + bool HasLowerHost(const TStringBuf &url) { + for (size_t n = 0; n < url.length(); ++n) { + if (url[n] == '/') + break; + if (isupper(url[n])) + return false; + } + return true; + } + + TStringBuf CutHttpWwwPrefixes(const TStringBuf &url) { + TStringBuf urlCut = CutWWWPrefix(CutHttpPrefix(url)); + if (!urlCut.empty() && urlCut.back() == '/') + urlCut = urlCut.substr(0, urlCut.length() - 1); + return urlCut; + } + + TString MakeLowerHost(const TStringBuf &url, size_t shift) { + TString urlFixed(url); + for (char *c = urlFixed.begin() + shift; *c && (*c != '/'); ++c) { + *c = tolower(*c); + } + + return urlFixed; + } + + TString MakeNormalized(const TStringBuf &url) { + TStringBuf urlCut = CutHttpWwwPrefixes(url); + if (HasLowerHost(urlCut)) { + return ToString(urlCut); + } + return MakeLowerHost(urlCut); + } + } // namespace NUrl size_t GetHttpPrefixSize(const char* url, bool ignorehttps) noexcept { diff --git a/library/cpp/string_utils/url/url.h b/library/cpp/string_utils/url/url.h index 68fa23d64c..9b73233078 100644 --- a/library/cpp/string_utils/url/url.h +++ b/library/cpp/string_utils/url/url.h @@ -21,6 +21,11 @@ namespace NUrl { Y_PURE_FUNCTION TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url); + bool HasLowerHost(const TStringBuf &url); + TStringBuf CutHttpWwwPrefixes(const TStringBuf &url); + TString MakeLowerHost(const TStringBuf &url, size_t shift = 0); + TString MakeNormalized(const TStringBuf &url); + } // namespace NUrl Y_PURE_FUNCTION @@ -170,4 +175,3 @@ TStringBuf RemoveFinalSlash(TStringBuf str) noexcept; TStringBuf CutUrlPrefixes(TStringBuf url) noexcept; bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf& token) noexcept; - |