/* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ #if defined(__FreeBSD__) || defined(__APPLE__) /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_socket.h.html * The POSIX standard does not define the CMSG_LEN and CMSG_SPACE macros. FreeBSD * and APPLE check and disable these macros if the _POSIX_C_SOURCE flag is set. * * Since s2n-tls already unsets the _POSIX_C_SOURCE in other files and is not * POSIX compliant, we continue the pattern here. */ #undef _POSIX_C_SOURCE #endif #include #ifdef S2N_LINUX_SENDFILE #include #endif #include "crypto/s2n_sequence.h" #include "error/s2n_errno.h" #include "tls/s2n_ktls.h" #include "tls/s2n_tls.h" #include "utils/s2n_io.h" #include "utils/s2n_result.h" #include "utils/s2n_safety.h" #include "utils/s2n_socket.h" /* record_type is of type uint8_t */ #define S2N_KTLS_RECORD_TYPE_SIZE (sizeof(uint8_t)) #define S2N_KTLS_CONTROL_BUFFER_SIZE (CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE)) #define S2N_MAX_STACK_IOVECS 16 #define S2N_MAX_STACK_IOVECS_MEM (S2N_MAX_STACK_IOVECS * sizeof(struct iovec)) /* Used to override sendmsg and recvmsg for testing. */ static ssize_t s2n_ktls_default_sendmsg(void *io_context, const struct msghdr *msg); static ssize_t s2n_ktls_default_recvmsg(void *io_context, struct msghdr *msg); s2n_ktls_sendmsg_fn s2n_sendmsg_fn = s2n_ktls_default_sendmsg; s2n_ktls_recvmsg_fn s2n_recvmsg_fn = s2n_ktls_default_recvmsg; S2N_RESULT s2n_ktls_set_sendmsg_cb(struct s2n_connection *conn, s2n_ktls_sendmsg_fn send_cb, void *send_ctx) { RESULT_ENSURE_REF(conn); RESULT_ENSURE_REF(send_ctx); RESULT_ENSURE(s2n_in_test(), S2N_ERR_NOT_IN_TEST); conn->send_io_context = send_ctx; s2n_sendmsg_fn = send_cb; return S2N_RESULT_OK; } S2N_RESULT s2n_ktls_set_recvmsg_cb(struct s2n_connection *conn, s2n_ktls_recvmsg_fn recv_cb, void *recv_ctx) { RESULT_ENSURE_REF(conn); RESULT_ENSURE_REF(recv_ctx); RESULT_ENSURE(s2n_in_test(), S2N_ERR_NOT_IN_TEST); conn->recv_io_context = recv_ctx; s2n_recvmsg_fn = recv_cb; return S2N_RESULT_OK; } static ssize_t s2n_ktls_default_recvmsg(void *io_context, struct msghdr *msg) { POSIX_ENSURE_REF(io_context); POSIX_ENSURE_REF(msg); const struct s2n_socket_read_io_context *peer_socket_ctx = io_context; POSIX_ENSURE_REF(peer_socket_ctx); int fd = peer_socket_ctx->fd; return recvmsg(fd, msg, 0); } static ssize_t s2n_ktls_default_sendmsg(void *io_context, const struct msghdr *msg) { POSIX_ENSURE_REF(io_context); POSIX_ENSURE_REF(msg); const struct s2n_socket_write_io_context *peer_socket_ctx = io_context; POSIX_ENSURE_REF(peer_socket_ctx); int fd = peer_socket_ctx->fd; return sendmsg(fd, msg, 0); } S2N_RESULT s2n_ktls_set_control_data(struct msghdr *msg, char *buf, size_t buf_size, int cmsg_type, uint8_t record_type) { RESULT_ENSURE_REF(msg); RESULT_ENSURE_REF(buf); /* * https://man7.org/linux/man-pages/man3/cmsg.3.html * To create ancillary data, first initialize the msg_controllen * member of the msghdr with the length of the control message * buffer. */ msg->msg_control = buf; msg->msg_controllen = buf_size; /* * https://man7.org/linux/man-pages/man3/cmsg.3.html * Use CMSG_FIRSTHDR() on the msghdr to get the first * control message and CMSG_NXTHDR() to get all subsequent ones. */ struct cmsghdr *hdr = CMSG_FIRSTHDR(msg); RESULT_ENSURE_REF(hdr); /* * https://man7.org/linux/man-pages/man3/cmsg.3.html * In each control message, initialize cmsg_len (with CMSG_LEN()), the * other cmsghdr header fields, and the data portion using * CMSG_DATA(). */ hdr->cmsg_len = CMSG_LEN(S2N_KTLS_RECORD_TYPE_SIZE); hdr->cmsg_level = S2N_SOL_TLS; hdr->cmsg_type = cmsg_type; *CMSG_DATA(hdr) = record_type; /* * https://man7.org/linux/man-pages/man3/cmsg.3.html * Finally, the msg_controllen field of the msghdr * should be set to the sum of the CMSG_SPACE() of the length of all * control messages in the buffer */ RESULT_ENSURE_GTE(msg->msg_controllen, CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE)); msg->msg_controllen = CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE); return S2N_RESULT_OK; } /* Expect to receive a single cmsghdr containing the TLS record_type. * * s2n-tls allocates enough space to receive a single cmsghdr. Since this is * used to get the record_type when receiving over kTLS (enabled via * `s2n_connection_ktls_enable_recv`), the application should not configure * the socket to receive additional control messages. In the event s2n-tls * can not retrieve the record_type, it is safer to drop the record. */ S2N_RESULT s2n_ktls_get_control_data(struct msghdr *msg, int cmsg_type, uint8_t *record_type) { RESULT_ENSURE_REF(msg); RESULT_ENSURE_REF(record_type); /* https://man7.org/linux/man-pages/man3/recvmsg.3p.html * MSG_CTRUNC Control data was truncated. */ if (msg->msg_flags & MSG_CTRUNC) { RESULT_BAIL(S2N_ERR_KTLS_BAD_CMSG); } /* * https://man7.org/linux/man-pages/man3/cmsg.3.html * To create ancillary data, first initialize the msg_controllen * member of the msghdr with the length of the control message * buffer. */ RESULT_ENSURE(msg->msg_control, S2N_ERR_SAFETY); RESULT_ENSURE(msg->msg_controllen >= CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE), S2N_ERR_SAFETY); /* https://man7.org/linux/man-pages/man3/cmsg.3.html * Use CMSG_FIRSTHDR() on the msghdr to get the first * control message and CMSG_NXTHDR() to get all subsequent ones. */ struct cmsghdr *hdr = CMSG_FIRSTHDR(msg); RESULT_ENSURE(hdr, S2N_ERR_KTLS_BAD_CMSG); /* * https://man7.org/linux/man-pages/man3/cmsg.3.html * In each control message, initialize cmsg_len (with CMSG_LEN()), the * other cmsghdr header fields, and the data portion using * CMSG_DATA(). */ RESULT_ENSURE(hdr->cmsg_level == S2N_SOL_TLS, S2N_ERR_KTLS_BAD_CMSG); RESULT_ENSURE(hdr->cmsg_type == cmsg_type, S2N_ERR_KTLS_BAD_CMSG); RESULT_ENSURE(hdr->cmsg_len == CMSG_LEN(S2N_KTLS_RECORD_TYPE_SIZE), S2N_ERR_KTLS_BAD_CMSG); *record_type = *CMSG_DATA(hdr); return S2N_RESULT_OK; } S2N_RESULT s2n_ktls_sendmsg(void *io_context, uint8_t record_type, const struct iovec *msg_iov, size_t msg_iovlen, s2n_blocked_status *blocked, size_t *bytes_written) { RESULT_ENSURE_REF(bytes_written); RESULT_ENSURE_REF(blocked); RESULT_ENSURE(msg_iov != NULL || msg_iovlen == 0, S2N_ERR_NULL); *blocked = S2N_BLOCKED_ON_WRITE; *bytes_written = 0; struct msghdr msg = { /* msghdr requires a non-const iovec. This is safe because s2n-tls does * not modify msg_iov after this point. */ .msg_iov = (struct iovec *) (uintptr_t) msg_iov, .msg_iovlen = msg_iovlen, }; char control_data[S2N_KTLS_CONTROL_BUFFER_SIZE] = { 0 }; RESULT_GUARD(s2n_ktls_set_control_data(&msg, control_data, sizeof(control_data), S2N_TLS_SET_RECORD_TYPE, record_type)); ssize_t result = 0; S2N_IO_RETRY_EINTR(result, s2n_sendmsg_fn(io_context, &msg)); RESULT_GUARD(s2n_io_check_write_result(result)); *blocked = S2N_NOT_BLOCKED; *bytes_written = result; return S2N_RESULT_OK; } S2N_RESULT s2n_ktls_recvmsg(void *io_context, uint8_t *record_type, uint8_t *buf, size_t buf_len, s2n_blocked_status *blocked, size_t *bytes_read) { RESULT_ENSURE_REF(record_type); RESULT_ENSURE_REF(bytes_read); RESULT_ENSURE_REF(blocked); RESULT_ENSURE_REF(buf); /* Ensure that buf_len is > 0 since trying to receive 0 bytes does not * make sense and a return value of `0` from recvmsg is treated as EOF. */ RESULT_ENSURE_GT(buf_len, 0); *blocked = S2N_BLOCKED_ON_READ; *record_type = 0; *bytes_read = 0; struct iovec msg_iov = { .iov_base = buf, .iov_len = buf_len }; struct msghdr msg = { .msg_iov = &msg_iov, .msg_iovlen = 1, }; /* * https://man7.org/linux/man-pages/man3/cmsg.3.html * To create ancillary data, first initialize the msg_controllen * member of the msghdr with the length of the control message * buffer. */ char control_data[S2N_KTLS_CONTROL_BUFFER_SIZE] = { 0 }; msg.msg_controllen = sizeof(control_data); msg.msg_control = control_data; ssize_t result = 0; S2N_IO_RETRY_EINTR(result, s2n_recvmsg_fn(io_context, &msg)); RESULT_GUARD(s2n_io_check_read_result(result)); RESULT_GUARD(s2n_ktls_get_control_data(&msg, S2N_TLS_GET_RECORD_TYPE, record_type)); *blocked = S2N_NOT_BLOCKED; *bytes_read = result; return S2N_RESULT_OK; } /* The RFC defines the encryption limits in terms of "full-size records" sent. * We can estimate the number of "full-sized records" sent by assuming that * all records are full-sized. */ static S2N_RESULT s2n_ktls_estimate_records(size_t bytes, uint64_t *estimate) { RESULT_ENSURE_REF(estimate); uint64_t records = bytes / S2N_TLS_MAXIMUM_FRAGMENT_LENGTH; if (bytes % S2N_TLS_MAXIMUM_FRAGMENT_LENGTH) { records++; } *estimate = records; return S2N_RESULT_OK; } /* ktls does not currently support updating keys, so we should kill the connection * when the key encryption limit is reached. We could get the current record * sequence number from the kernel with getsockopt, but that requires a surprisingly * expensive syscall. * * Instead, we track the estimated sequence number and enforce the limit based * on that estimate. */ static S2N_RESULT s2n_ktls_check_estimated_record_limit( struct s2n_connection *conn, size_t bytes_requested) { RESULT_ENSURE_REF(conn); if (conn->actual_protocol_version < S2N_TLS13) { return S2N_RESULT_OK; } uint64_t new_records_sent = 0; RESULT_GUARD(s2n_ktls_estimate_records(bytes_requested, &new_records_sent)); uint64_t old_records_sent = 0; struct s2n_blob seq_num = { 0 }; RESULT_GUARD(s2n_connection_get_sequence_number(conn, conn->mode, &seq_num)); RESULT_GUARD_POSIX(s2n_sequence_number_to_uint64(&seq_num, &old_records_sent)); RESULT_ENSURE(S2N_ADD_IS_OVERFLOW_SAFE(old_records_sent, new_records_sent, UINT64_MAX), S2N_ERR_KTLS_KEY_LIMIT); uint64_t total_records_sent = old_records_sent + new_records_sent; RESULT_ENSURE_REF(conn->secure); RESULT_ENSURE_REF(conn->secure->cipher_suite); RESULT_ENSURE_REF(conn->secure->cipher_suite->record_alg); uint64_t encryption_limit = conn->secure->cipher_suite->record_alg->encryption_limit; RESULT_ENSURE(total_records_sent <= encryption_limit, S2N_ERR_KTLS_KEY_LIMIT); return S2N_RESULT_OK; } static S2N_RESULT s2n_ktls_set_estimated_sequence_number( struct s2n_connection *conn, size_t bytes_written) { RESULT_ENSURE_REF(conn); if (conn->actual_protocol_version < S2N_TLS13) { return S2N_RESULT_OK; } uint64_t new_records_sent = 0; RESULT_GUARD(s2n_ktls_estimate_records(bytes_written, &new_records_sent)); struct s2n_blob seq_num = { 0 }; RESULT_GUARD(s2n_connection_get_sequence_number(conn, conn->mode, &seq_num)); for (size_t i = 0; i < new_records_sent; i++) { RESULT_GUARD_POSIX(s2n_increment_sequence_number(&seq_num)); } return S2N_RESULT_OK; } /* The iovec array `bufs` is constant and owned by the application. * * However, we need to apply the given offset to `bufs`. That may involve * updating the iov_base and iov_len of entries in `bufs` to reflect the bytes * already sent. Because `bufs` is constant, we need to instead copy `bufs` and * modify the copy. * * Since one of the primary benefits of kTLS is that we avoid buffering application * data and can pass application data as-is to the kernel, we try to limit the * situations where we need to copy `bufs` and use stack memory where possible. * * Note: We are copying an array of iovecs here, NOT the scattered application * data the iovecs reference. On Linux, the maximum data copied would be * 1024 (IOV_MAX on Linux) * 16 (sizeof(struct iovec)) = ~16KB. * * To avoid any copies when using a large number of iovecs, applications should * call s2n_sendv instead of s2n_sendv_with_offset. */ static S2N_RESULT s2n_ktls_update_bufs_with_offset(const struct iovec **bufs, size_t *count, size_t offs, struct s2n_blob *mem) { RESULT_ENSURE_REF(bufs); RESULT_ENSURE_REF(count); RESULT_ENSURE(*bufs != NULL || *count == 0, S2N_ERR_NULL); RESULT_ENSURE_REF(mem); size_t skipped = 0; while (offs > 0) { /* If we need to skip more iovecs than actually exist, * then the offset is too large and therefore invalid. */ RESULT_ENSURE(skipped < *count, S2N_ERR_INVALID_ARGUMENT); size_t iov_len = (*bufs)[skipped].iov_len; /* This is the last iovec affected by the offset. */ if (offs < iov_len) { break; } offs -= iov_len; skipped++; } *count = (*count) - skipped; if (*count == 0) { return S2N_RESULT_OK; } *bufs = &(*bufs)[skipped]; if (offs == 0) { return S2N_RESULT_OK; } size_t size = (*count) * (sizeof(struct iovec)); /* If possible, use the existing stack memory in `mem` for the copy. * Otherwise, we need to allocate sufficient new heap memory. */ if (size > mem->size) { RESULT_GUARD_POSIX(s2n_alloc(mem, size)); } struct iovec *new_bufs = (struct iovec *) (void *) mem->data; RESULT_CHECKED_MEMCPY(new_bufs, *bufs, size); new_bufs[0].iov_base = (uint8_t *) new_bufs[0].iov_base + offs; new_bufs[0].iov_len = new_bufs[0].iov_len - offs; *bufs = new_bufs; return S2N_RESULT_OK; } ssize_t s2n_ktls_sendv_with_offset(struct s2n_connection *conn, const struct iovec *bufs, ssize_t count_in, ssize_t offs_in, s2n_blocked_status *blocked) { POSIX_ENSURE_REF(conn); POSIX_ENSURE(count_in >= 0, S2N_ERR_INVALID_ARGUMENT); size_t count = count_in; POSIX_ENSURE(offs_in >= 0, S2N_ERR_INVALID_ARGUMENT); size_t offs = offs_in; ssize_t total_bytes = 0; POSIX_GUARD_RESULT(s2n_sendv_with_offset_total_size(bufs, count_in, offs_in, &total_bytes)); POSIX_GUARD_RESULT(s2n_ktls_check_estimated_record_limit(conn, total_bytes)); /* The order of new_bufs and new_bufs_mem matters. See https://github.com/aws/s2n-tls/issues/4354 */ uint8_t new_bufs_mem[S2N_MAX_STACK_IOVECS_MEM] = { 0 }; DEFER_CLEANUP(struct s2n_blob new_bufs = { 0 }, s2n_free_or_wipe); POSIX_GUARD(s2n_blob_init(&new_bufs, new_bufs_mem, sizeof(new_bufs_mem))); if (offs > 0) { POSIX_GUARD_RESULT(s2n_ktls_update_bufs_with_offset(&bufs, &count, offs, &new_bufs)); } size_t bytes_written = 0; POSIX_GUARD_RESULT(s2n_ktls_sendmsg(conn->send_io_context, TLS_APPLICATION_DATA, bufs, count, blocked, &bytes_written)); POSIX_GUARD_RESULT(s2n_ktls_set_estimated_sequence_number(conn, bytes_written)); return bytes_written; } int s2n_ktls_send_cb(void *io_context, const uint8_t *buf, uint32_t len) { POSIX_ENSURE_REF(io_context); POSIX_ENSURE_REF(buf); /* For now, all control records are assumed to be alerts. * We can set the record_type on the io_context in the future. */ const uint8_t record_type = TLS_ALERT; const struct iovec iov = { .iov_base = (void *) (uintptr_t) buf, .iov_len = len, }; s2n_blocked_status blocked = S2N_NOT_BLOCKED; size_t bytes_written = 0; POSIX_GUARD_RESULT(s2n_ktls_sendmsg(io_context, record_type, &iov, 1, &blocked, &bytes_written)); POSIX_ENSURE_LTE(bytes_written, len); return bytes_written; } int s2n_ktls_record_writev(struct s2n_connection *conn, uint8_t content_type, const struct iovec *in, int in_count, size_t offs, size_t to_write) { POSIX_ENSURE_REF(conn); POSIX_ENSURE(in_count > 0, S2N_ERR_INVALID_ARGUMENT); size_t count = in_count; POSIX_ENSURE_REF(in); /* Currently, ktls only supports sending alerts. * To also support handshake messages, we would need a way to track record_type. * We could add a field to the send io context. */ POSIX_ENSURE(content_type == TLS_ALERT, S2N_ERR_UNIMPLEMENTED); /* When stuffers automatically resize, they allocate a potentially large * chunk of memory to avoid repeated resizes. * Since ktls only uses conn->out for control messages (alerts and eventually * handshake messages), we expect infrequent small writes with conn->out * freed in between. Since we're therefore more concerned with the size of * the allocation than the frequency, use a more accurate size for each write. */ POSIX_GUARD(s2n_stuffer_resize_if_empty(&conn->out, to_write)); POSIX_GUARD(s2n_stuffer_writev_bytes(&conn->out, in, count, offs, to_write)); return to_write; } int s2n_sendfile(struct s2n_connection *conn, int in_fd, off_t offset, size_t count, size_t *bytes_written, s2n_blocked_status *blocked) { POSIX_ENSURE_REF(blocked); *blocked = S2N_BLOCKED_ON_WRITE; POSIX_ENSURE_REF(bytes_written); *bytes_written = 0; POSIX_ENSURE_REF(conn); POSIX_ENSURE(conn->ktls_send_enabled, S2N_ERR_KTLS_UNSUPPORTED_CONN); POSIX_GUARD_RESULT(s2n_ktls_check_estimated_record_limit(conn, count)); int out_fd = 0; POSIX_GUARD_RESULT(s2n_ktls_get_file_descriptor(conn, S2N_KTLS_MODE_SEND, &out_fd)); #ifdef S2N_LINUX_SENDFILE /* https://man7.org/linux/man-pages/man2/sendfile.2.html */ ssize_t result = 0; S2N_IO_RETRY_EINTR(result, sendfile(out_fd, in_fd, &offset, count)); POSIX_GUARD_RESULT(s2n_io_check_write_result(result)); *bytes_written = result; #else POSIX_BAIL(S2N_ERR_UNIMPLEMENTED); #endif POSIX_GUARD_RESULT(s2n_ktls_set_estimated_sequence_number(conn, *bytes_written)); *blocked = S2N_NOT_BLOCKED; return S2N_SUCCESS; } int s2n_ktls_read_full_record(struct s2n_connection *conn, uint8_t *record_type) { POSIX_ENSURE_REF(conn); POSIX_ENSURE_REF(record_type); /* If any unread data remains in conn->in, it must be application data that * couldn't be returned due to the size of the application's provided buffer. */ if (s2n_stuffer_data_available(&conn->in)) { *record_type = TLS_APPLICATION_DATA; return S2N_SUCCESS; } POSIX_GUARD(s2n_stuffer_resize_if_empty(&conn->buffer_in, S2N_DEFAULT_FRAGMENT_LENGTH)); struct s2n_stuffer record_stuffer = conn->buffer_in; size_t len = s2n_stuffer_space_remaining(&record_stuffer); uint8_t *buf = s2n_stuffer_raw_write(&record_stuffer, len); POSIX_ENSURE_REF(buf); s2n_blocked_status blocked = S2N_NOT_BLOCKED; size_t bytes_read = 0; /* Since recvmsg is responsible for decrypting the record in ktls, * we apply blinding to the recvmsg call. */ s2n_result result = s2n_ktls_recvmsg(conn->recv_io_context, record_type, buf, len, &blocked, &bytes_read); WITH_ERROR_BLINDING(conn, POSIX_GUARD_RESULT(result)); POSIX_GUARD(s2n_stuffer_skip_write(&conn->buffer_in, bytes_read)); /* We don't care about returning a full fragment because we don't need to decrypt. * kTLS handled decryption already. * So we can always set conn->in equal to the full buffer_in. */ POSIX_GUARD_RESULT(s2n_recv_in_init(conn, bytes_read, bytes_read)); return S2N_SUCCESS; }