aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Storages/Cache/ExternalDataSourceCache.h
blob: 937801c4767154df0b6f4bc2ec7c6e0481ba3d08 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#pragma once
#include <condition_variable>
#include <filesystem>
#include <list>
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <Core/BackgroundSchedulePool.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadSettings.h>
#include <IO/SeekableReadBuffer.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileBase.h>
#include <Disks/IO/createReadBufferFromFileBase.h>
#include <Interpreters/Context.h>
#include <Storages/Cache/IRemoteFileMetadata.h>
#include <Storages/Cache/RemoteCacheController.h>
#include <Storages/Cache/RemoteFileCachePolicy.h>
#include <boost/core/noncopyable.hpp>
#include <Poco/Logger.h>
#include <Common/ErrorCodes.h>
#include <Common/LRUResourceCache.h>
#include <Common/ThreadPool.h>


namespace DB
{
using RemoteFileCacheType = LRUResourceCache<String, RemoteCacheController, RemoteFileCacheWeightFunction, RemoteFileCacheReleaseFunction>;

class LocalFileHolder
{
public:
    explicit LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_controller);
    explicit LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_controller, std::unique_ptr<ReadBuffer> original_readbuffer_, BackgroundSchedulePool * thread_pool_);
    ~LocalFileHolder();

    RemoteFileCacheType::MappedHolderPtr file_cache_controller;
    std::unique_ptr<ReadBufferFromFileBase> file_buffer;
    std::unique_ptr<ReadBuffer> original_readbuffer;
    BackgroundSchedulePool * thread_pool;
};

class RemoteReadBuffer : public BufferWithOwnMemory<SeekableReadBuffer>, public WithFileSize
{
public:
    explicit RemoteReadBuffer(size_t buff_size);
    ~RemoteReadBuffer() override = default;
    static std::unique_ptr<ReadBuffer> create(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr<ReadBuffer> read_buffer, size_t buff_size, bool is_random_accessed = false);

    bool nextImpl() override;
    off_t seek(off_t off, int whence) override;
    off_t getPosition() override;
    size_t getFileSize() override { return remote_file_size; }

private:
    std::unique_ptr<LocalFileHolder> local_file_holder;
    size_t remote_file_size = 0;
};


class ExternalDataSourceCache : private boost::noncopyable
{
public:
    ~ExternalDataSourceCache();
    // global instance
    static ExternalDataSourceCache & instance();

    void initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_);

    inline bool isInitialized() const { return initialized; }

    std::pair<std::unique_ptr<LocalFileHolder>, std::unique_ptr<ReadBuffer>>
    createReader(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr<ReadBuffer> & read_buffer, bool is_random_accessed);


    void updateTotalSize(size_t size) { total_size += size; }

protected:
    ExternalDataSourceCache();

private:
    // Root directory of local cache for remote filesystem.
    Strings root_dirs;
    size_t local_cache_bytes_read_before_flush = 0;

    std::atomic<bool> initialized = false;
    std::atomic<size_t> total_size;
    std::mutex mutex;
    std::unique_ptr<RemoteFileCacheType> lru_caches;

    Poco::Logger * log = &Poco::Logger::get("ExternalDataSourceCache");

    String calculateLocalPath(IRemoteFileMetadataPtr meta) const;

    BackgroundSchedulePool::TaskHolder recover_task_holder;
    void recoverTask();
};
}