aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Common/TLDListsHolder.h
blob: be399843c087aa2e8e98571c4688ab9552326ebc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#pragma once

#include <base/defines.h>
#include <base/StringRef.h>
#include <Common/HashTable/StringHashMap.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <mutex>
#include <string>
#include <unordered_map>

namespace DB
{
class Arena;

enum TLDType
{
    /// Does not exist marker
    TLD_NONE,
    /// For regular lines
    TLD_REGULAR,
    /// For asterisk (*)
    TLD_ANY,
    /// For exclamation mark (!)
    TLD_EXCLUDE,
};

/// Custom TLD List
///
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashMap.
class TLDList
{
public:
    using Container = StringHashMap<TLDType>;

    explicit TLDList(size_t size);

    void insert(const String & host, TLDType type);
    TLDType lookup(StringRef host) const;
    size_t size() const { return tld_container.size(); }

private:
    Container tld_container;
    std::unique_ptr<Arena> memory_pool;
};

class TLDListsHolder
{
public:
    using Map = std::unordered_map<std::string, TLDList>;

    static TLDListsHolder & getInstance();

    /// Parse "top_level_domains_lists" section,
    /// And add each found dictionary.
    void parseConfig(const std::string & top_level_domains_path, const Poco::Util::AbstractConfiguration & config);

    /// Parse file and add it as a Set to the list of TLDs
    /// - "//" -- comment,
    /// - empty lines will be ignored.
    ///
    /// Treats the following special symbols:
    /// - "*"
    /// - "!"
    ///
    /// Format : https://github.com/publicsuffix/list/wiki/Format
    /// Example: https://publicsuffix.org/list/public_suffix_list.dat
    ///
    /// Return size of the list.
    size_t parseAndAddTldList(const std::string & name, const std::string & path);
    /// Throws TLD_LIST_NOT_FOUND if list does not exist
    const TLDList & getTldList(const std::string & name);

protected:
    TLDListsHolder();

    std::mutex tld_lists_map_mutex;
    Map tld_lists_map TSA_GUARDED_BY(tld_lists_map_mutex);
};

}