aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/restricted/aws/s2n/tls/s2n_ktls_io.c
blob: 12ee729d8702bac08ca52d683c52a7901e93119a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

#if defined(__FreeBSD__) || defined(__APPLE__)
    /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_socket.h.html
     * The POSIX standard does not define the CMSG_LEN and CMSG_SPACE macros. FreeBSD
     * and APPLE check and disable these macros if the _POSIX_C_SOURCE flag is set.
     *
     * Since s2n-tls already unsets the _POSIX_C_SOURCE in other files and is not
     * POSIX compliant, we continue the pattern here.
     */
    #undef _POSIX_C_SOURCE
#endif
#include <sys/socket.h>

#ifdef S2N_LINUX_SENDFILE
    #include <sys/sendfile.h>
#endif

#include "error/s2n_errno.h"
#include "tls/s2n_ktls.h"
#include "tls/s2n_tls.h"
#include "utils/s2n_io.h"
#include "utils/s2n_result.h"
#include "utils/s2n_safety.h"
#include "utils/s2n_socket.h"

/* record_type is of type uint8_t */
#define S2N_KTLS_RECORD_TYPE_SIZE    (sizeof(uint8_t))
#define S2N_KTLS_CONTROL_BUFFER_SIZE (CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE))

#define S2N_MAX_STACK_IOVECS     16
#define S2N_MAX_STACK_IOVECS_MEM (S2N_MAX_STACK_IOVECS * sizeof(struct iovec))

/* Used to override sendmsg and recvmsg for testing. */
static ssize_t s2n_ktls_default_sendmsg(void *io_context, const struct msghdr *msg);
static ssize_t s2n_ktls_default_recvmsg(void *io_context, struct msghdr *msg);
s2n_ktls_sendmsg_fn s2n_sendmsg_fn = s2n_ktls_default_sendmsg;
s2n_ktls_recvmsg_fn s2n_recvmsg_fn = s2n_ktls_default_recvmsg;

S2N_RESULT s2n_ktls_set_sendmsg_cb(struct s2n_connection *conn, s2n_ktls_sendmsg_fn send_cb,
        void *send_ctx)
{
    RESULT_ENSURE_REF(conn);
    RESULT_ENSURE_REF(send_ctx);
    RESULT_ENSURE(s2n_in_test(), S2N_ERR_NOT_IN_TEST);
    conn->send_io_context = send_ctx;
    s2n_sendmsg_fn = send_cb;
    return S2N_RESULT_OK;
}

S2N_RESULT s2n_ktls_set_recvmsg_cb(struct s2n_connection *conn, s2n_ktls_recvmsg_fn recv_cb,
        void *recv_ctx)
{
    RESULT_ENSURE_REF(conn);
    RESULT_ENSURE_REF(recv_ctx);
    RESULT_ENSURE(s2n_in_test(), S2N_ERR_NOT_IN_TEST);
    conn->recv_io_context = recv_ctx;
    s2n_recvmsg_fn = recv_cb;
    return S2N_RESULT_OK;
}

static ssize_t s2n_ktls_default_recvmsg(void *io_context, struct msghdr *msg)
{
    POSIX_ENSURE_REF(io_context);
    POSIX_ENSURE_REF(msg);

    const struct s2n_socket_read_io_context *peer_socket_ctx = io_context;
    POSIX_ENSURE_REF(peer_socket_ctx);
    int fd = peer_socket_ctx->fd;

    return recvmsg(fd, msg, 0);
}

static ssize_t s2n_ktls_default_sendmsg(void *io_context, const struct msghdr *msg)
{
    POSIX_ENSURE_REF(io_context);
    POSIX_ENSURE_REF(msg);

    const struct s2n_socket_write_io_context *peer_socket_ctx = io_context;
    POSIX_ENSURE_REF(peer_socket_ctx);
    int fd = peer_socket_ctx->fd;

    return sendmsg(fd, msg, 0);
}

S2N_RESULT s2n_ktls_set_control_data(struct msghdr *msg, char *buf, size_t buf_size,
        int cmsg_type, uint8_t record_type)
{
    RESULT_ENSURE_REF(msg);
    RESULT_ENSURE_REF(buf);

    /*
     * https://man7.org/linux/man-pages/man3/cmsg.3.html
     * To create ancillary data, first initialize the msg_controllen
     * member of the msghdr with the length of the control message
     * buffer.
     */
    msg->msg_control = buf;
    msg->msg_controllen = buf_size;

    /*
     * https://man7.org/linux/man-pages/man3/cmsg.3.html
     * Use CMSG_FIRSTHDR() on the msghdr to get the first
     * control message and CMSG_NXTHDR() to get all subsequent ones.
     */
    struct cmsghdr *hdr = CMSG_FIRSTHDR(msg);
    RESULT_ENSURE_REF(hdr);

    /*
     * https://man7.org/linux/man-pages/man3/cmsg.3.html
     * In each control message, initialize cmsg_len (with CMSG_LEN()), the
     * other cmsghdr header fields, and the data portion using
     * CMSG_DATA().
     */
    hdr->cmsg_len = CMSG_LEN(S2N_KTLS_RECORD_TYPE_SIZE);
    hdr->cmsg_level = S2N_SOL_TLS;
    hdr->cmsg_type = cmsg_type;
    *CMSG_DATA(hdr) = record_type;

    /*
     * https://man7.org/linux/man-pages/man3/cmsg.3.html
     * Finally, the msg_controllen field of the msghdr
     * should be set to the sum of the CMSG_SPACE() of the length of all
     * control messages in the buffer
     */
    RESULT_ENSURE_GTE(msg->msg_controllen, CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE));
    msg->msg_controllen = CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE);

    return S2N_RESULT_OK;
}

/* Expect to receive a single cmsghdr containing the TLS record_type.
 *
 * s2n-tls allocates enough space to receive a single cmsghdr. Since this is
 * used to get the record_type when receiving over kTLS (enabled via
 * `s2n_connection_ktls_enable_recv`), the application should not configure
 * the socket to receive additional control messages. In the event s2n-tls
 * can not retrieve the record_type, it is safer to drop the record.
 */
S2N_RESULT s2n_ktls_get_control_data(struct msghdr *msg, int cmsg_type, uint8_t *record_type)
{
    RESULT_ENSURE_REF(msg);
    RESULT_ENSURE_REF(record_type);

    /* https://man7.org/linux/man-pages/man3/recvmsg.3p.html
     * MSG_CTRUNC  Control data was truncated.
     */
    if (msg->msg_flags & MSG_CTRUNC) {
        RESULT_BAIL(S2N_ERR_KTLS_BAD_CMSG);
    }

    /*
     * https://man7.org/linux/man-pages/man3/cmsg.3.html
     * To create ancillary data, first initialize the msg_controllen
     * member of the msghdr with the length of the control message
     * buffer.
     */
    RESULT_ENSURE(msg->msg_control, S2N_ERR_SAFETY);
    RESULT_ENSURE(msg->msg_controllen >= CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE), S2N_ERR_SAFETY);

    /* https://man7.org/linux/man-pages/man3/cmsg.3.html
     * Use CMSG_FIRSTHDR() on the msghdr to get the first
     * control message and CMSG_NXTHDR() to get all subsequent ones.
     */
    struct cmsghdr *hdr = CMSG_FIRSTHDR(msg);
    RESULT_ENSURE(hdr, S2N_ERR_KTLS_BAD_CMSG);

    /*
     * https://man7.org/linux/man-pages/man3/cmsg.3.html
     * In each control message, initialize cmsg_len (with CMSG_LEN()), the
     * other cmsghdr header fields, and the data portion using
     * CMSG_DATA().
     */
    RESULT_ENSURE(hdr->cmsg_level == S2N_SOL_TLS, S2N_ERR_KTLS_BAD_CMSG);
    RESULT_ENSURE(hdr->cmsg_type == cmsg_type, S2N_ERR_KTLS_BAD_CMSG);
    RESULT_ENSURE(hdr->cmsg_len == CMSG_LEN(S2N_KTLS_RECORD_TYPE_SIZE), S2N_ERR_KTLS_BAD_CMSG);
    *record_type = *CMSG_DATA(hdr);

    return S2N_RESULT_OK;
}

S2N_RESULT s2n_ktls_sendmsg(void *io_context, uint8_t record_type, const struct iovec *msg_iov,
        size_t msg_iovlen, s2n_blocked_status *blocked, size_t *bytes_written)
{
    RESULT_ENSURE_REF(bytes_written);
    RESULT_ENSURE_REF(blocked);
    RESULT_ENSURE(msg_iov != NULL || msg_iovlen == 0, S2N_ERR_NULL);

    *blocked = S2N_BLOCKED_ON_WRITE;
    *bytes_written = 0;

    struct msghdr msg = {
        /* msghdr requires a non-const iovec. This is safe because s2n-tls does
         * not modify msg_iov after this point.
         */
        .msg_iov = (struct iovec *) (uintptr_t) msg_iov,
        .msg_iovlen = msg_iovlen,
    };

    char control_data[S2N_KTLS_CONTROL_BUFFER_SIZE] = { 0 };
    RESULT_GUARD(s2n_ktls_set_control_data(&msg, control_data, sizeof(control_data),
            S2N_TLS_SET_RECORD_TYPE, record_type));

    ssize_t result = 0;
    S2N_IO_RETRY_EINTR(result, s2n_sendmsg_fn(io_context, &msg));
    RESULT_GUARD(s2n_io_check_write_result(result));

    *blocked = S2N_NOT_BLOCKED;
    *bytes_written = result;
    return S2N_RESULT_OK;
}

S2N_RESULT s2n_ktls_recvmsg(void *io_context, uint8_t *record_type, uint8_t *buf,
        size_t buf_len, s2n_blocked_status *blocked, size_t *bytes_read)
{
    RESULT_ENSURE_REF(record_type);
    RESULT_ENSURE_REF(bytes_read);
    RESULT_ENSURE_REF(blocked);
    RESULT_ENSURE_REF(buf);
    /* Ensure that buf_len is > 0 since trying to receive 0 bytes does not
     * make sense and a return value of `0` from recvmsg is treated as EOF.
     */
    RESULT_ENSURE_GT(buf_len, 0);

    *blocked = S2N_BLOCKED_ON_READ;
    *record_type = 0;
    *bytes_read = 0;
    struct iovec msg_iov = {
        .iov_base = buf,
        .iov_len = buf_len
    };
    struct msghdr msg = {
        .msg_iov = &msg_iov,
        .msg_iovlen = 1,
    };

    /*
     * https://man7.org/linux/man-pages/man3/cmsg.3.html
     * To create ancillary data, first initialize the msg_controllen
     * member of the msghdr with the length of the control message
     * buffer.
     */
    char control_data[S2N_KTLS_CONTROL_BUFFER_SIZE] = { 0 };
    msg.msg_controllen = sizeof(control_data);
    msg.msg_control = control_data;

    ssize_t result = 0;
    S2N_IO_RETRY_EINTR(result, s2n_recvmsg_fn(io_context, &msg));
    RESULT_GUARD(s2n_io_check_read_result(result));

    RESULT_GUARD(s2n_ktls_get_control_data(&msg, S2N_TLS_GET_RECORD_TYPE, record_type));

    *blocked = S2N_NOT_BLOCKED;
    *bytes_read = result;
    return S2N_RESULT_OK;
}

/* The iovec array `bufs` is constant and owned by the application.
 *
 * However, we need to apply the given offset to `bufs`. That may involve
 * updating the iov_base and iov_len of entries in `bufs` to reflect the bytes
 * already sent. Because `bufs` is constant, we need to instead copy `bufs` and
 * modify the copy.
 *
 * Since one of the primary benefits of kTLS is that we avoid buffering application
 * data and can pass application data as-is to the kernel, we try to limit the
 * situations where we need to copy `bufs` and use stack memory where possible.
 *
 * Note: We are copying an array of iovecs here, NOT the scattered application
 * data the iovecs reference. On Linux, the maximum data copied would be
 * 1024 (IOV_MAX on Linux) * 16 (sizeof(struct iovec)) = ~16KB.
 *
 * To avoid any copies when using a large number of iovecs, applications should
 * call s2n_sendv instead of s2n_sendv_with_offset.
 */
static S2N_RESULT s2n_ktls_update_bufs_with_offset(const struct iovec **bufs, size_t *count,
        size_t offs, struct s2n_blob *mem)
{
    RESULT_ENSURE_REF(bufs);
    RESULT_ENSURE_REF(count);
    RESULT_ENSURE(*bufs != NULL || *count == 0, S2N_ERR_NULL);
    RESULT_ENSURE_REF(mem);

    size_t skipped = 0;
    while (offs > 0) {
        /* If we need to skip more iovecs than actually exist,
         * then the offset is too large and therefore invalid.
         */
        RESULT_ENSURE(skipped < *count, S2N_ERR_INVALID_ARGUMENT);

        size_t iov_len = (*bufs)[skipped].iov_len;

        /* This is the last iovec affected by the offset. */
        if (offs < iov_len) {
            break;
        }

        offs -= iov_len;
        skipped++;
    }

    *count = (*count) - skipped;
    if (*count == 0) {
        return S2N_RESULT_OK;
    }

    *bufs = &(*bufs)[skipped];
    if (offs == 0) {
        return S2N_RESULT_OK;
    }

    size_t size = (*count) * (sizeof(struct iovec));
    /* If possible, use the existing stack memory in `mem` for the copy.
     * Otherwise, we need to allocate sufficient new heap memory. */
    if (size > mem->size) {
        RESULT_GUARD_POSIX(s2n_alloc(mem, size));
    }

    struct iovec *new_bufs = (struct iovec *) (void *) mem->data;
    RESULT_CHECKED_MEMCPY(new_bufs, *bufs, size);
    new_bufs[0].iov_base = (uint8_t *) new_bufs[0].iov_base + offs;
    new_bufs[0].iov_len = new_bufs[0].iov_len - offs;
    *bufs = new_bufs;

    return S2N_RESULT_OK;
}

ssize_t s2n_ktls_sendv_with_offset(struct s2n_connection *conn, const struct iovec *bufs,
        ssize_t count_in, ssize_t offs_in, s2n_blocked_status *blocked)
{
    POSIX_ENSURE_REF(conn);
    POSIX_ENSURE(count_in >= 0, S2N_ERR_INVALID_ARGUMENT);
    size_t count = count_in;
    POSIX_ENSURE(offs_in >= 0, S2N_ERR_INVALID_ARGUMENT);
    size_t offs = offs_in;

    DEFER_CLEANUP(struct s2n_blob new_bufs = { 0 }, s2n_free_or_wipe);
    uint8_t new_bufs_mem[S2N_MAX_STACK_IOVECS_MEM] = { 0 };
    POSIX_GUARD(s2n_blob_init(&new_bufs, new_bufs_mem, sizeof(new_bufs_mem)));
    if (offs > 0) {
        POSIX_GUARD_RESULT(s2n_ktls_update_bufs_with_offset(&bufs, &count, offs, &new_bufs));
    }

    size_t bytes_written = 0;
    POSIX_GUARD_RESULT(s2n_ktls_sendmsg(conn->send_io_context, TLS_APPLICATION_DATA,
            bufs, count, blocked, &bytes_written));
    return bytes_written;
}

int s2n_ktls_send_cb(void *io_context, const uint8_t *buf, uint32_t len)
{
    POSIX_ENSURE_REF(io_context);
    POSIX_ENSURE_REF(buf);

    /* For now, all control records are assumed to be alerts.
     * We can set the record_type on the io_context in the future.
     */
    const uint8_t record_type = TLS_ALERT;

    const struct iovec iov = {
        .iov_base = (void *) (uintptr_t) buf,
        .iov_len = len,
    };
    s2n_blocked_status blocked = S2N_NOT_BLOCKED;
    size_t bytes_written = 0;

    POSIX_GUARD_RESULT(s2n_ktls_sendmsg(io_context, record_type, &iov, 1,
            &blocked, &bytes_written));

    POSIX_ENSURE_LTE(bytes_written, len);
    return bytes_written;
}

int s2n_ktls_record_writev(struct s2n_connection *conn, uint8_t content_type,
        const struct iovec *in, int in_count, size_t offs, size_t to_write)
{
    POSIX_ENSURE_REF(conn);
    POSIX_ENSURE(in_count > 0, S2N_ERR_INVALID_ARGUMENT);
    size_t count = in_count;
    POSIX_ENSURE_REF(in);

    /* Currently, ktls only supports sending alerts.
     * To also support handshake messages, we would need a way to track record_type.
     * We could add a field to the send io context.
     */
    POSIX_ENSURE(content_type == TLS_ALERT, S2N_ERR_UNIMPLEMENTED);

    /* When stuffers automatically resize, they allocate a potentially large
     * chunk of memory to avoid repeated resizes.
     * Since ktls only uses conn->out for control messages (alerts and eventually
     * handshake messages), we expect infrequent small writes with conn->out
     * freed in between. Since we're therefore more concerned with the size of
     * the allocation than the frequency, use a more accurate size for each write.
     */
    POSIX_GUARD(s2n_stuffer_resize_if_empty(&conn->out, to_write));

    POSIX_GUARD(s2n_stuffer_writev_bytes(&conn->out, in, count, offs, to_write));
    return to_write;
}

int s2n_sendfile(struct s2n_connection *conn, int in_fd, off_t offset, size_t count,
        size_t *bytes_written, s2n_blocked_status *blocked)
{
    POSIX_ENSURE_REF(blocked);
    *blocked = S2N_BLOCKED_ON_WRITE;
    POSIX_ENSURE_REF(bytes_written);
    *bytes_written = 0;
    POSIX_ENSURE_REF(conn);
    POSIX_ENSURE(conn->ktls_send_enabled, S2N_ERR_KTLS_UNSUPPORTED_CONN);

    int out_fd = 0;
    POSIX_GUARD_RESULT(s2n_ktls_get_file_descriptor(conn, S2N_KTLS_MODE_SEND, &out_fd));

#ifdef S2N_LINUX_SENDFILE
    /* https://man7.org/linux/man-pages/man2/sendfile.2.html */
    ssize_t result = 0;
    S2N_IO_RETRY_EINTR(result, sendfile(out_fd, in_fd, &offset, count));
    POSIX_GUARD_RESULT(s2n_io_check_write_result(result));
    *bytes_written = result;
#else
    POSIX_BAIL(S2N_ERR_UNIMPLEMENTED);
#endif

    *blocked = S2N_NOT_BLOCKED;
    return S2N_SUCCESS;
}

int s2n_ktls_read_full_record(struct s2n_connection *conn, uint8_t *record_type)
{
    POSIX_ENSURE_REF(conn);
    POSIX_ENSURE_REF(record_type);

    /* If any unread data remains in conn->in, it must be application data that
     * couldn't be returned due to the size of the application's provided buffer.
     */
    if (s2n_stuffer_data_available(&conn->in)) {
        *record_type = TLS_APPLICATION_DATA;
        return S2N_SUCCESS;
    }

    POSIX_GUARD(s2n_stuffer_resize_if_empty(&conn->in, S2N_DEFAULT_FRAGMENT_LENGTH));

    struct s2n_stuffer record_stuffer = conn->in;
    size_t len = s2n_stuffer_space_remaining(&record_stuffer);
    uint8_t *buf = s2n_stuffer_raw_write(&record_stuffer, len);
    POSIX_ENSURE_REF(buf);

    s2n_blocked_status blocked = S2N_NOT_BLOCKED;
    size_t bytes_read = 0;

    /* Since recvmsg is responsible for decrypting the record in ktls,
     * we apply blinding to the recvmsg call.
     */
    s2n_result result = s2n_ktls_recvmsg(conn->recv_io_context, record_type,
            buf, len, &blocked, &bytes_read);
    WITH_ERROR_BLINDING(conn, POSIX_GUARD_RESULT(result));

    POSIX_GUARD(s2n_stuffer_skip_write(&conn->in, bytes_read));
    return S2N_SUCCESS;
}