aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/IO/SeekableReadBuffer.h
blob: 5770948be205a469a2c37859455f586d50c148f9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#pragma once

#include <IO/ReadBuffer.h>
#include <IO/WithFileSize.h>
#include <optional>

namespace DB
{

namespace ErrorCodes
{
    extern const int NOT_IMPLEMENTED;
}


class SeekableReadBuffer : public ReadBuffer
{
public:
    SeekableReadBuffer(Position ptr, size_t size)
        : ReadBuffer(ptr, size) {}
    SeekableReadBuffer(Position ptr, size_t size, size_t offset)
        : ReadBuffer(ptr, size, offset) {}

    /**
     * Shifts buffer current position to given offset.
     * @param off Offset.
     * @param whence Seek mode (@see SEEK_SET, @see SEEK_CUR).
     * @return New position from the beginning of underlying buffer / file.
     *
     * What happens if you seek above the end of the file? Implementation-defined.
     */
    virtual off_t seek(off_t off, int whence) = 0;

    /**
     * Keep in mind that seekable buffer may encounter eof() once and the working buffer
     * may get into inconsistent state. Don't forget to reset it on the first nextImpl()
     * after seek().
     */

    /**
     * @return Offset from the begin of the underlying buffer / file corresponds to the buffer current position.
     */
    virtual off_t getPosition() = 0;

    virtual String getInfoForLog() { return ""; }

    virtual size_t getFileOffsetOfBufferEnd() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFileOffsetOfBufferEnd() not implemented"); }

    /// If true, setReadUntilPosition() guarantees that eof will be reported at the given position.
    virtual bool supportsRightBoundedReads() const { return false; }

    /// Returns true if seek() actually works, false if seek() will always throw (or make subsequent
    /// nextImpl() calls throw).
    ///
    /// This is needed because:
    ///  * Sometimes there's no cheap way to know in advance whether the buffer is really seekable.
    ///    Specifically, HTTP read buffer needs to send a request to check whether the server
    ///    supports byte ranges.
    ///  * Sometimes when we create such buffer we don't know in advance whether we'll need it to be
    ///    seekable or not. So we don't want to pay the price for this check in advance.
    virtual bool checkIfActuallySeekable() { return true; }

    /// Unbuffered positional read.
    /// Doesn't affect the buffer state (position, working_buffer, etc).
    ///
    /// `progress_callback` may be called periodically during the read, reporting that to[0..m-1]
    /// has been filled. If it returns true, reading is stopped, and readBigAt() returns bytes read
    /// so far. Called only from inside readBigAt(), from the same thread, with increasing m.
    ///
    /// Stops either after n bytes, or at end of file, or on exception. Returns number of bytes read.
    /// If offset is past the end of file, may return 0 or throw exception.
    ///
    /// Caller needs to be careful:
    ///  * supportsReadAt() must be checked (called and return true) before calling readBigAt().
    ///    Otherwise readBigAt() may crash.
    ///  * Thread safety: multiple readBigAt() calls may be performed in parallel.
    ///    But readBigAt() may not be called in parallel with any other methods
    ///    (e.g. next() or supportsReadAt()).
    ///  * Performance: there's no buffering. Each readBigAt() call typically translates into actual
    ///    IO operation (e.g. HTTP request). Don't use it for small adjacent reads.
    virtual size_t readBigAt(char * /*to*/, size_t /*n*/, size_t /*offset*/, const std::function<bool(size_t m)> & /*progress_callback*/ = nullptr)
        { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method readBigAt() not implemented"); }

    /// Checks if readBigAt() is allowed. May be slow, may throw (e.g. it may do an HTTP request or an fstat).
    virtual bool supportsReadAt() { return false; }

    /// We do some tricks to avoid seek cost. E.g we read more data and than ignore it (see remote_read_min_bytes_for_seek).
    /// Sometimes however seek is basically free because underlying read buffer wasn't yet initialised (or re-initialised after reset).
    virtual bool seekIsCheap() { return false; }
};


using SeekableReadBufferPtr = std::shared_ptr<SeekableReadBuffer>;

/// Wraps a reference to a SeekableReadBuffer into an unique pointer to SeekableReadBuffer.
/// This function is like wrapReadBufferReference() but for SeekableReadBuffer.
std::unique_ptr<SeekableReadBuffer> wrapSeekableReadBufferReference(SeekableReadBuffer & ref);
std::unique_ptr<SeekableReadBuffer> wrapSeekableReadBufferPointer(SeekableReadBufferPtr ptr);

/// Helper for implementing readBigAt().
size_t copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_t n, const std::function<bool(size_t)> & progress_callback, bool * out_cancelled = nullptr);

}