blob: be399843c087aa2e8e98571c4688ab9552326ebc (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
#pragma once
#include <base/defines.h>
#include <base/StringRef.h>
#include <Common/HashTable/StringHashMap.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <mutex>
#include <string>
#include <unordered_map>
namespace DB
{
class Arena;
enum TLDType
{
/// Does not exist marker
TLD_NONE,
/// For regular lines
TLD_REGULAR,
/// For asterisk (*)
TLD_ANY,
/// For exclamation mark (!)
TLD_EXCLUDE,
};
/// Custom TLD List
///
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashMap.
class TLDList
{
public:
using Container = StringHashMap<TLDType>;
explicit TLDList(size_t size);
void insert(const String & host, TLDType type);
TLDType lookup(StringRef host) const;
size_t size() const { return tld_container.size(); }
private:
Container tld_container;
std::unique_ptr<Arena> memory_pool;
};
class TLDListsHolder
{
public:
using Map = std::unordered_map<std::string, TLDList>;
static TLDListsHolder & getInstance();
/// Parse "top_level_domains_lists" section,
/// And add each found dictionary.
void parseConfig(const std::string & top_level_domains_path, const Poco::Util::AbstractConfiguration & config);
/// Parse file and add it as a Set to the list of TLDs
/// - "//" -- comment,
/// - empty lines will be ignored.
///
/// Treats the following special symbols:
/// - "*"
/// - "!"
///
/// Format : https://github.com/publicsuffix/list/wiki/Format
/// Example: https://publicsuffix.org/list/public_suffix_list.dat
///
/// Return size of the list.
size_t parseAndAddTldList(const std::string & name, const std::string & path);
/// Throws TLD_LIST_NOT_FOUND if list does not exist
const TLDList & getTldList(const std::string & name);
protected:
TLDListsHolder();
std::mutex tld_lists_map_mutex;
Map tld_lists_map TSA_GUARDED_BY(tld_lists_map_mutex);
};
}
|