1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "domain.h"
namespace DB
{
template<bool conform_rfc>
struct ExtractTopLevelDomain
{
static size_t getReserveLengthForElement() { return 5; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
std::string_view host;
if constexpr (conform_rfc)
host = getURLHostRFC(data, size);
else
host = getURLHost(data, size);
res_data = data;
res_size = 0;
if (!host.empty())
{
if (host[host.size() - 1] == '.')
host.remove_suffix(1);
const auto * host_end = host.data() + host.size();
Pos last_dot = find_last_symbols_or_null<'.'>(host.data(), host_end);
if (!last_dot)
return;
/// For IPv4 addresses select nothing.
///
/// NOTE: it is safe to access last_dot[1]
/// since getURLHost() will not return a host if there is symbol after dot.
if (isNumericASCII(last_dot[1]))
return;
res_data = last_dot + 1;
res_size = host_end - res_data;
}
}
};
struct NameTopLevelDomain { static constexpr auto name = "topLevelDomain"; };
using FunctionTopLevelDomain = FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain<false>>, NameTopLevelDomain>;
struct NameTopLevelDomainRFC { static constexpr auto name = "topLevelDomainRFC"; };
using FunctionTopLevelDomainRFC = FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain<true>>, NameTopLevelDomainRFC>;
REGISTER_FUNCTION(TopLevelDomain)
{
factory.registerFunction<FunctionTopLevelDomain>(FunctionDocumentation
{
.description=R"(
Extracts the the top-level domain from a URL.
Returns an empty string if the argument cannot be parsed as a URL or does not contain a top-level domain.
)",
.examples{{"topLevelDomain", "SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')", ""}},
.categories{"URL"}
});
factory.registerFunction<FunctionTopLevelDomainRFC>(FunctionDocumentation
{
.description=R"(Similar to topLevelDomain, but conforms to RFC 3986.)",
.examples{},
.categories{"URL"}
});
}
}
|