aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Interpreters/Lemmatizers.h
blob: 2b8fcbea0d26769e8ce832f16d02c4fe54731ad7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#pragma once

#include "clickhouse_config.h"

#if USE_NLP

#include <base/types.h>
#include <Poco/Util/Application.h>

#include <mutex>
#include <unordered_map>


namespace DB
{

class ILemmatizer
{
public:
    using TokenPtr = std::shared_ptr<char []>;

    virtual TokenPtr lemmatize(const char * token) = 0;

    virtual ~ILemmatizer() = default;
};


class Lemmatizers
{
public:
    using LemmPtr = std::shared_ptr<ILemmatizer>;

private:
    std::mutex mutex;
    std::unordered_map<String, LemmPtr> lemmatizers;
    std::unordered_map<String, String> paths;

public:
    explicit Lemmatizers(const Poco::Util::AbstractConfiguration & config);

    LemmPtr getLemmatizer(const String & name);
};

}

#endif