aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h
blob: 8d8fb9ef0c60053887abfe5440964505e3c31f4e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#pragma once

#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Formats/IRowOutputFormat.h>

#include <IO/WriteBuffer.h>
#include <IO/WriteBufferValidUTF8.h>

namespace DB
{

template <typename Base, typename... Args>
class OutputFormatWithUTF8ValidationAdaptorBase : public Base
{
public:
    OutputFormatWithUTF8ValidationAdaptorBase(bool validate_utf8, const Block & header, WriteBuffer & out_, Args... args)
        : Base(header, out_, std::forward<Args>(args)...)
    {
        bool values_can_contain_invalid_utf8 = false;
        for (const auto & type : this->getPort(IOutputFormat::PortKind::Main).getHeader().getDataTypes())
        {
            if (!type->textCanContainOnlyValidUTF8())
                values_can_contain_invalid_utf8 = true;
        }

        if (validate_utf8 && values_can_contain_invalid_utf8)
        {
            validating_ostr = std::make_unique<WriteBufferValidUTF8>(this->out);
            ostr = validating_ostr.get();
        }
        else
            ostr = &this->out;
    }

    void flush() override
    {
        ostr->next();

        if (validating_ostr)
            this->out.next();
    }

    void finalizeBuffers() override
    {
        if (validating_ostr)
            validating_ostr->finalize();
    }

    void resetFormatterImpl() override
    {
        validating_ostr = std::make_unique<WriteBufferValidUTF8>(this->out);
        ostr = validating_ostr.get();
    }

protected:
    /// Point to validating_ostr or out from IOutputFormat, should be used in derived classes instead of out.
    WriteBuffer * ostr;

private:
    /// Validates UTF-8 sequences, replaces bad sequences with replacement character.
    std::unique_ptr<WriteBuffer> validating_ostr;
};

using OutputFormatWithUTF8ValidationAdaptor = OutputFormatWithUTF8ValidationAdaptorBase<IOutputFormat>;
using RowOutputFormatWithUTF8ValidationAdaptor = OutputFormatWithUTF8ValidationAdaptorBase<IRowOutputFormat>;

}