aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/core/yql_udf_index.h
blob: 1ac882bd81eba9de8bebbcd3931576cacd3027d2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#pragma once

#include "yql_udf_resolver.h"
#include <util/generic/hash.h>
#include <util/generic/map.h>
#include <util/generic/maybe.h>
#include <util/generic/set.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>

namespace NYql {

struct TFunctionInfo {
    TString Name;
    int ArgCount = 0;
    int OptionalArgCount = 0;
    bool IsTypeAwareness = false;
    TString CallableType;
    TString RunConfigType;
    bool IsStrict = false;
    bool SupportsBlocks = false;
};

// todo: specify whether path is frozen
struct TDownloadLink {
    bool IsUrl = false;
    TString Path;
    TString Md5;

    TDownloadLink() {

    }

    TDownloadLink(bool isUrl, const TString& path, const TString& md5)
        : IsUrl(isUrl)
        , Path(path)
        , Md5(md5)
    {
    }

    TDownloadLink(const TDownloadLink&) = default;
    TDownloadLink& operator=(const TDownloadLink&) = default;

    static TDownloadLink Url(const TString& path, const TString& md5 = "") {
        return { true, path, md5 };
    }

    static TDownloadLink File(const TString& path, const TString& md5 = "") {
        return { false, path, md5 };
    }

    bool operator==(const TDownloadLink& other) const {
        return std::tie(IsUrl, Path, Md5) == std::tie(other.IsUrl, other.Path, Md5);
    }

    bool operator!=(const TDownloadLink& other) const {
        return !(*this == other);
    }

    size_t Hash() const {
        return CombineHashes(
            CombineHashes((size_t)IsUrl, ComputeHash(Path)),
            ComputeHash(Md5)
        );
    }
};

struct TResourceInfo : public TThrRefBase {
    typedef TIntrusiveConstPtr<TResourceInfo> TPtr;

    bool IsTrusted = false;
    TDownloadLink Link;
    TSet<TString> Modules;
    TMap<TString, TFunctionInfo> Functions;
    TMap<TString, TSet<TString>> ICaseFuncNames;

    void SetFunctions(const TVector<TFunctionInfo>& functions) {
        for (auto& f : functions) {
            Functions.emplace(f.Name, f);
            ICaseFuncNames[to_lower(f.Name)].insert(f.Name);
        }
    }
};

inline bool operator<(const TResourceInfo::TPtr& p1, const TResourceInfo::TPtr& p2) {
    return p1.Get() < p2.Get();
}

class TUdfIndex : public TThrRefBase {
public:
    typedef TIntrusivePtr<TUdfIndex> TPtr;

public:
    // todo: trusted resources should not be replaceble regardless of specified mode
    enum class EOverrideMode {
        PreserveExisting,
        ReplaceWithNew,
        RaiseError
    };

    enum class EStatus {
        Found,
        NotFound,
        Ambigious
    };

public:
    TUdfIndex();
    void SetCaseSentiveSearch(bool caseSensitive);
    bool CanonizeModule(TString& moduleName) const;
    EStatus ContainsModule(const TString& moduleName) const;
    EStatus FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const;
    TResourceInfo::TPtr FindResourceByModule(const TString& moduleName) const;

    bool ContainsModuleStrict(const TString& moduleName) const;
    /*
    New resource can contain already registered module.
    In this case 'mode' will be used to resolve conflicts.
    For instance, if mode == ReplaceWithNew all functions from old resource will be removed and new functions will be registered.
    It is important to do it atomically because two .so cannot have intersecting module lists
    */
    void RegisterResource(const TResourceInfo::TPtr& resource, EOverrideMode mode);
    void RegisterResources(const TVector<TResourceInfo::TPtr>& resources, EOverrideMode mode);

    TIntrusivePtr<TUdfIndex> Clone() const;

private:
    explicit TUdfIndex(const TMap<TString, TResourceInfo::TPtr>& resources, bool caseSensitive);

    bool ContainsAnyModule(const TSet<TString>& modules) const;
    TSet<TResourceInfo::TPtr> FindResourcesByModules(const TSet<TString>& modules) const;
    void UnregisterResource(TResourceInfo::TPtr resource);

private:
    // module => Resource
    TMap<TString, TResourceInfo::TPtr> Resources_;
    bool CaseSensitive_ = true;
    TMap<TString, TSet<TString>> ICaseModules_;
};

void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TVector<TString>& paths, bool isTrusted, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);
void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TMap<TString, TString>& pathsWithMd5, bool isTrusted, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);
void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TVector<TUserDataBlock>& blocks, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);
void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TUserDataBlock& block, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);

}