1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
#pragma once
#include <string_view>
#include <unordered_map>
#include <base/types.h>
#include <Compression/ICompressionCodec.h>
#include <Poco/Util/LayeredConfiguration.h>
#include <Common/MultiVersion.h>
namespace DB
{
/// Now we have two algorithms.
enum EncryptionMethod
{
AES_128_GCM_SIV,
AES_256_GCM_SIV,
MAX_ENCRYPTION_METHOD
};
/// Get encryption method for string name. Throw exception for wrong name.
EncryptionMethod toEncryptionMethod(const std::string & name);
/** This codec encrypts and decrypts blocks with AES-128 in
* GCM-SIV mode (RFC-8452), which is the only cipher currently
* supported. Although it is implemented as a compression codec
* it doesn't actually compress data. In fact encrypted data will
* no longer be compressible in any meaningful way. This means if
* you want to apply both compression and encryption to your
* columns, you need to put this codec at the end of the chain
* like "column Int32 Codec(Delta, LZ4,
* AES_128_GCM_SIV)".
*
* The key is obtained by executing a command specified in the
* configuration file at startup, and if it doesn't specify a
* command the codec refuses to process any data. The command is
* expected to write a Base64-encoded key of any length, and we
* apply HKDF-SHA-256 to derive a 128-bit key-generation key
* (only the first half of the result is used). We then encrypt
* blocks in AES-128-GCM-SIV with a universally fixed nonce (12
* repeated NUL characters).
*
* This construct has a weakness due to the nonce being fixed at
* all times: when the same data block is encrypted twice, the
* resulting ciphertext will be exactly the same. We have to live
* with this weakness because ciphertext must be deterministic,
* as otherwise our engines like ReplicatedMergeTree cannot
* deduplicate data blocks.
*/
class CompressionCodecEncrypted final : public ICompressionCodec
{
public:
/** If a key is available, the server is supposed to
* invoke this static method at the startup. The codec will
* refuse to compress or decompress any data until that. The
* key can be an arbitrary octet string, but it is
* recommended that the key is at least 16 octets long.
*
* Note that the key is currently not guarded by a
* mutex. This method should be invoked no more than once.
*/
explicit CompressionCodecEncrypted(EncryptionMethod Method);
/**
* This is utility class. It holds information about encryption configuration.
*/
class Configuration
{
public:
/// Configuration should be singleton. Instance method
static Configuration & instance();
/// Try to load data from config.
bool tryLoad(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
/// Load data and throw exception if something went wrong.
void load(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
/// Get current key and nonce (they will be set in variables, which are pass in this function).
/// All data sets at the same time to prevent situations,
/// when config changes and key and nonce are read from different versions
/// If nonce is empty, it will return 12 null bytes.
void getCurrentKeyAndNonce(EncryptionMethod method, UInt64 & current_key_id, String & current_key, String & nonce) const;
/// Same as getCurrentKeyAndNonce. It is used to get key. (need for correct decryption, that is why nonce is not necessary)
String getKey(EncryptionMethod method, const UInt64 & key_id) const;
private:
/// struct Params consists of:
/// 1) hash-table of keys and their ids
/// 2) current key for encryption
/// 3) nonce for encryption
/// All this parameters have MAX_ENCRYPTION_METHOD count of versions,
/// because all algorithms can be described in config and used for different tables.
struct Params
{
std::unordered_map<UInt64, String> keys_storage[MAX_ENCRYPTION_METHOD];
UInt64 current_key_id[MAX_ENCRYPTION_METHOD] = {0, 0};
String nonce[MAX_ENCRYPTION_METHOD];
};
// used to read data from config and create Params
static void loadImpl(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, EncryptionMethod method, std::unique_ptr<Params>& new_params);
MultiVersion<Params> params;
};
uint8_t getMethodByte() const override;
void updateHash(SipHash & hash) const override;
bool isCompression() const override
{
return false;
}
bool isGenericCompression() const override
{
return false;
}
bool isEncryption() const override
{
return true;
}
protected:
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
/// Encrypt data with chosen method.
/// Throws exception if encryption is impossible or size of encrypted text is incorrect
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
/// Decrypt data with chosen method
/// Throws exception if decryption is impossible or size of decrypted text is incorrect
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
private:
EncryptionMethod encryption_method;
};
}
|