/* * Copyright (c) 2015-2019 Contributors as noted in the AUTHORS file * * This file is part of Solo5, a sandboxed execution environment. * * Permission to use, copy, modify, and/or distribute this software * for any purpose with or without fee is hereby granted, provided * that the above copyright notice and this permission notice appear * in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * hvt_core.c: Core functionality. * * Maintains tables of modules, hypercall handlers and vmexit handlers. * Implements core hypercall functionality which is always present. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #if defined(__linux__) #include #include #elif defined(__FreeBSD__) || defined(__OpenBSD__) #include #include #include #else #error Unsupported target #endif #include "hvt.h" hvt_hypercall_fn_t hvt_core_hypercalls[HVT_HYPERCALL_MAX] = { 0 }; int hvt_core_register_hypercall(int nr, hvt_hypercall_fn_t fn) { if (nr >= HVT_HYPERCALL_MAX) return -1; if (hvt_core_hypercalls[nr] != NULL) return -1; hvt_core_hypercalls[nr] = fn; return 0; } #define HVT_HALT_HOOKS_MAX 8 hvt_halt_fn_t hvt_core_halt_hooks[HVT_HALT_HOOKS_MAX] = {0}; static int nr_halt_hooks; int hvt_core_register_halt_hook(hvt_halt_fn_t fn) { if (nr_halt_hooks == HVT_HALT_HOOKS_MAX) return -1; hvt_core_halt_hooks[nr_halt_hooks] = fn; nr_halt_hooks++; return 0; } int hvt_core_hypercall_halt(struct hvt *hvt, hvt_gpa_t gpa) { void *cookie; int idx; struct hvt_hc_halt *t = HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_hc_halt)); /* * If the guest set a non-NULL cookie (non-zero before conversion), verify * that the memory space pointed to by it is accessible and pass it down to * halt hooks, if any. */ if (t->cookie != 0) cookie = HVT_CHECKED_GPA_P(hvt, t->cookie, HVT_HALT_COOKIE_MAX); else cookie = NULL; for (idx = 0; idx < nr_halt_hooks; idx++) { hvt_halt_fn_t fn = hvt_core_halt_hooks[idx]; assert(fn != NULL); fn(hvt, t->exit_status, cookie); } return t->exit_status; } #define NUM_MODULES 8 hvt_vmexit_fn_t hvt_core_vmexits[NUM_MODULES + 1] = { 0 }; static int nvmexits = 0; int hvt_core_register_vmexit(hvt_vmexit_fn_t fn) { if (nvmexits == NUM_MODULES) return -1; hvt_core_vmexits[nvmexits] = fn; nvmexits++; return 0; } static void hypercall_walltime(struct hvt *hvt, hvt_gpa_t gpa) { struct hvt_hc_walltime *t = HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_hc_walltime)); struct timespec ts; int rc = clock_gettime(CLOCK_REALTIME, &ts); assert(rc == 0); t->nsecs = (ts.tv_sec * 1000000000ULL) + ts.tv_nsec; } static void hypercall_puts(struct hvt *hvt, hvt_gpa_t gpa) { struct hvt_hc_puts *p = HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_hc_puts)); int rc = write(1, HVT_CHECKED_GPA_P(hvt, p->data, p->len), p->len); assert(rc >= 0); } static int waitsetfd = -1; static int npollfds; #if defined(__linux__) static int timerfd = -1; #define INTERNAL_TIMERFD (~1U) #endif static void setup_waitset(void) { #if defined(__linux__) waitsetfd = epoll_create1(0); if (waitsetfd == -1) err(1, "Could not create wait set"); timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); if (timerfd == -1) err(1, "Could not create wait set timerfd"); struct epoll_event ev; ev.events = EPOLLIN; ev.data.u64 = INTERNAL_TIMERFD; if (epoll_ctl(waitsetfd, EPOLL_CTL_ADD, timerfd, &ev) == -1) err(1, "epoll_ctl(EPOLL_CTL_ADD) failed"); #else /* kqueue */ waitsetfd = kqueue(); if (waitsetfd == -1) err(1, "Could not create wait set"); #endif } int hvt_core_register_pollfd(int fd, uintptr_t waitset_data) { if (waitsetfd == -1) setup_waitset(); #if defined(__linux__) struct epoll_event ev; ev.events = EPOLLIN; /* * waitset_data is a solo5_handle_t, and will be returned by epoll() as * part of any received event. */ ev.data.u64 = waitset_data; if (epoll_ctl(waitsetfd, EPOLL_CTL_ADD, fd, &ev) == -1) err(1, "epoll_ctl(EPOLL_CTL_ADD) failed"); #else /* kqueue */ struct kevent ev; /* * waitset_data is a solo5_handle_t, and will be returned by kevent() as * part of any received event. */ EV_SET(&ev, fd, EVFILT_READ, EV_ADD, 0, 0, (void *)waitset_data); if (kevent(waitsetfd, &ev, 1, NULL, 0, NULL) == -1) err(1, "kevent(EV_ADD) failed"); #endif npollfds++; return 0; } static void hypercall_poll(struct hvt *hvt, hvt_gpa_t gpa) { struct hvt_hc_poll *t = HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_hc_poll)); #if defined(__linux__) /* * On Linux, in order to support nanosecond timeouts, as defined by the * Solo5 API, we use a timerfd internally in the waitset. Account for this * in the number of requested events. */ int nevents = npollfds ? (npollfds + 1) : 1; int nrevents; uint64_t ready_set = 0; struct epoll_event revents[nevents]; struct itimerspec it = { .it_interval = { 0 }, .it_value = { .tv_sec = t->timeout_nsecs / 1000000000ULL, .tv_nsec = t->timeout_nsecs % 1000000000ULL } }; /* * Ensure that it.it_value is always non-zero, otherwise the following * epoll_wait() will hang if there are no other descriptors in the waitset * due to the timer never firing. See timefd_settime(2). */ it.it_value.tv_nsec |= 1; if (timerfd_settime(timerfd, 0, &it, NULL) == -1) err(1, "timerfd_settime() failed"); /* * We can always safely restart this call on EINTR, since the internal * timerfd is independent of its invocation. */ do { nrevents = epoll_pwait(waitsetfd, revents, nevents, -1, NULL); } while (nrevents == -1 && errno == EINTR); if (nrevents > 0) { int orig_nrevents = nrevents; for (int i = 0; i < orig_nrevents; i++) if (revents[i].data.u64 == INTERNAL_TIMERFD) nrevents -= 1; /* Disregard in total reported events */ else ready_set |= (1ULL << revents[i].data.u64); } assert(nrevents >= 0); #else /* kqueue */ /* * At least one event must be requested in kevent(), otherwise the call * will just return or error. */ int nevents = npollfds ? npollfds : 1; int nrevents; uint64_t ready_set = 0; struct kevent revents[nevents]; struct timespec ts; ts.tv_sec = t->timeout_nsecs / 1000000000ULL; ts.tv_nsec = t->timeout_nsecs % 1000000000ULL; nrevents = kevent(waitsetfd, NULL, 0, revents, nevents, &ts); /* * Unlike the epoll() implementation, we can't easily restart the kqueue() * call on EINTR, due to not having a straightforward way to recalculate * the timeout. While we could use EVFILT_TIMER similarly to the Linux * timerfd, this has system-wide limits on the number of active timers. * * However: We don't handle any signals, other than by terminating the * tender. Therefore, we should never see EINTR in practice here. If this * turns out not to be the case, prominently warn the user about it and * pretend we woke up early with no events, which is better than just * asserting/aborting. */ if (nrevents == -1 && errno == EINTR) { warnx("hypercall_poll(): kqueue() returned EINTR"); warnx("hypercall_poll(): This should not happen, please report a bug"); nrevents = 0; } assert(nrevents >= 0); if (nrevents > 0) { for (int i = 0; i < nrevents; i++) ready_set |= (1ULL << (uintptr_t)revents[i].udata); } #endif t->ready_set = ready_set; t->ret = nrevents; } static int setup(struct hvt *hvt, struct mft *mft) { if (waitsetfd == -1) setup_waitset(); assert(hvt_core_register_hypercall(HVT_HYPERCALL_WALLTIME, hypercall_walltime) == 0); assert(hvt_core_register_hypercall(HVT_HYPERCALL_PUTS, hypercall_puts) == 0); assert(hvt_core_register_hypercall(HVT_HYPERCALL_POLL, hypercall_poll) == 0); return 0; } DECLARE_MODULE(core, .setup = setup )