/* Copyright (c) 2017-2018 Hans-Kristian Arntzen * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "wsi_timing.hpp" #include "wsi.hpp" #include #include #include #ifndef _WIN32 #include #endif namespace Vulkan { void WSITiming::init(WSIPlatform *platform, VkDevice device, VkSwapchainKHR swapchain, const WSITimingOptions &options) { this->platform = platform; this->device = device; this->swapchain = swapchain; this->options = options; serial = {}; pacing = {}; last_frame = {}; feedback = {}; smoothing = {}; feedback.timing_buffer.resize(64); } void WSITiming::set_debug_enable(bool enable) { options.debug = enable; } void WSITiming::set_latency_limiter(LatencyLimiter limiter) { options.latency_limiter = limiter; } const WSITimingOptions &WSITiming::get_options() const { return options; } void WSITiming::set_swap_interval(unsigned interval) { if (interval == options.swap_interval || interval == 0) return; // First, extrapolate to our current serial so we can make a more correct target time using the new swap interval. uint64_t target = compute_target_present_time_for_serial(serial.serial); if (target) { pacing.base_serial = serial.serial; pacing.base_present = target; } options.swap_interval = interval; } void WSITiming::update_refresh_interval() { VkRefreshCycleDurationGOOGLE refresh; if (vkGetRefreshCycleDurationGOOGLE(device, swapchain, &refresh) == VK_SUCCESS) { if (!feedback.refresh_interval || options.debug) LOGI("Observed refresh rate: %.6f Hz.\n", 1e9 / refresh.refreshDuration); feedback.refresh_interval = refresh.refreshDuration; } else LOGE("Failed to get refresh cycle duration.\n"); } WSITiming::Timing *WSITiming::find_latest_timestamp(uint32_t start_serial) { for (uint32_t i = 1; i < NUM_TIMINGS - 1; i++) { uint32_t past_serial = start_serial - i; auto &past = feedback.past_timings[past_serial & NUM_TIMING_MASK]; if (past.wall_serial == past_serial && past.timing.actualPresentTime != 0) return &past; } return nullptr; } void WSITiming::update_past_presentation_timing() { // Update past presentation timings. uint32_t presentation_count; if (vkGetPastPresentationTimingGOOGLE(device, swapchain, &presentation_count, nullptr) != VK_SUCCESS) return; if (presentation_count) { if (presentation_count > feedback.timing_buffer.size()) feedback.timing_buffer.resize(presentation_count); auto res = vkGetPastPresentationTimingGOOGLE(device, swapchain, &presentation_count, feedback.timing_buffer.data()); // I have a feeling this is racy in nature and we might have received another presentation timing in-between // querying count and getting actual data, so accept INCOMPLETE here. if (res == VK_SUCCESS || res == VK_INCOMPLETE) { for (uint32_t i = 0; i < presentation_count; i++) { auto &t = feedback.past_timings[feedback.timing_buffer[i].presentID & NUM_TIMING_MASK]; if (t.wall_serial == feedback.timing_buffer[i].presentID) { t.timing = feedback.timing_buffer[i]; uint64_t gpu_done_time = (t.timing.earliestPresentTime - t.timing.presentMargin); t.slack = int64_t(t.timing.actualPresentTime - gpu_done_time); t.pipeline_latency = int64_t(gpu_done_time - t.wall_frame_begin); // Expected result unless proven otherwise. t.result = TimingResult::Expected; // Feed the heuristics on when to drop frame rate or promote it. if ((feedback.refresh_interval != 0) && (t.timing.earliestPresentTime < t.timing.actualPresentTime) && (t.timing.presentMargin > feedback.refresh_interval / 4)) { // We could have presented earlier, and we had decent GPU margin to do so. // Deal with frame dropping later. t.result = TimingResult::VeryEarly; if (options.debug) LOGI("Frame completed very early, but was held back by swap interval!\n"); } } update_frame_pacing(t.wall_serial, t.timing.actualPresentTime, false); } } } auto *timing = find_latest_timestamp(serial.serial); if (timing && timing->timing.actualPresentTime >= timing->wall_frame_begin) { auto total_latency = timing->timing.actualPresentTime - timing->wall_frame_begin; feedback.latency = 0.99 * feedback.latency + 0.01e-9 * total_latency; if (options.debug) { LOGI("Have presentation timing for %u frames in the past.\n", serial.serial - timing->timing.presentID); } if (int64_t(timing->timing.presentMargin) < 0) LOGE("Present margin is negative (%lld) ... ?!\n", static_cast(timing->timing.presentMargin)); if (timing->timing.earliestPresentTime > timing->timing.actualPresentTime) LOGE("Earliest present time is > actual present time ... Bug?\n"); if (timing->timing.actualPresentTime < timing->timing.desiredPresentTime) { LOGE("Image was presented before desired present time, bug? (actual: %llu, desired: %llu)\n", static_cast(timing->timing.actualPresentTime), static_cast(timing->timing.desiredPresentTime)); } else if (feedback.refresh_interval != 0 && timing->timing.desiredPresentTime != 0) { uint64_t delta = timing->timing.actualPresentTime - timing->timing.desiredPresentTime; if (delta >= feedback.refresh_interval) { LOGE("*** Image was presented %u frames late " "(target: %.3f ms, rounded target: %.3f ms, actual: %.3f ms) compared to desired target. " "This normally happens in startup phase, but otherwise it's either a real hitch or app bug. ***\n", unsigned(delta / feedback.refresh_interval), timing->wall_frame_target * 1e-6, timing->timing.desiredPresentTime * 1e-6, timing->timing.actualPresentTime * 1e-6); } } // How much can we squeeze latency? if (options.debug) LOGI("Total latency: %.3f ms, slack time: %.3f\n", total_latency * 1e-6, timing->slack * 1e-6); if (last_frame.serial && timing->wall_serial != last_frame.serial) { double frame_time_ns = double(timing->timing.actualPresentTime - last_frame.present_time) / double(timing->wall_serial - last_frame.serial); // We only detect a hitch if we have the same swap interval target, // otherwise it might as well just be a transient state thing. if ((timing->swap_interval_target == options.swap_interval) && (feedback.refresh_interval != 0) && (frame_time_ns > 1.1 * options.swap_interval * feedback.refresh_interval)) { LOGE("*** HITCH DETECTED ***\n"); timing->result = TimingResult::TooLate; if (platform) { unsigned frame_delta = unsigned(round(frame_time_ns / (options.swap_interval * feedback.refresh_interval))); VK_ASSERT(frame_delta); unsigned dropped_frames = frame_delta - 1; platform->event_display_timing_stutter(serial.serial, timing->wall_serial, dropped_frames); } } if (options.debug) { LOGI("Frame time ID #%u: %.3f ms\n", timing->wall_serial, 1e-6 * frame_time_ns); } } last_frame.serial = timing->wall_serial; last_frame.present_time = timing->timing.actualPresentTime; } } void WSITiming::wait_until(int64_t nsecs) { #ifndef _WIN32 timespec ts; ts.tv_sec = nsecs / 1000000000; ts.tv_nsec = nsecs % 1000000000; clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, nullptr); #else (void)nsecs; #endif } uint64_t WSITiming::get_wall_time() { #ifndef _WIN32 // GOOGLE_display_timing on Linux and Android use CLOCK_MONOTONIC explicitly. timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec * 1000000000ull + ts.tv_nsec; #else return 0; #endif } void WSITiming::update_frame_pacing(uint32_t serial, uint64_t present_time, bool wall_time) { if (wall_time && !pacing.have_real_estimate) { // We don't have a refresh interval yet, just update the estimate from CPU. pacing.base_serial = serial; pacing.base_present = present_time; pacing.have_estimate = true; return; } else if (!wall_time && !pacing.have_real_estimate) { pacing.base_serial = serial; pacing.base_present = present_time; pacing.have_real_estimate = true; return; } else if (wall_time) { // We already have a correct estimate, don't update. return; } if (!feedback.refresh_interval) { // If we don't have a refresh interval yet, we cannot estimate anything. // What we can do instead is just to blindly use the latest observed timestamp as our guiding hand. if (present_time > pacing.base_present) { pacing.base_serial = serial; pacing.base_present = present_time; } } else { int32_t frame_dist = int32_t(serial - pacing.base_serial); // Don't update with the past. if (frame_dist <= 0) return; // Extrapolate timing from current. uint64_t extrapolated_present_time = pacing.base_present + feedback.refresh_interval * options.swap_interval * (serial - pacing.base_serial); int64_t error = std::abs(int64_t(extrapolated_present_time - present_time)); // If the delta is close enough (expected frame pace), // update the base ID, so we can make more accurate future estimates. // This is relevant if we want to dynamically change swap interval. // If present time is significantly larger than extrapolated time, // we can assume we had a dropped frame, so we also need to update our base estimate. if ((present_time > extrapolated_present_time) || (error < int64_t(feedback.refresh_interval / 2))) { // We must have dropped frames, or similar. // Update our base estimate. pacing.base_serial = serial; pacing.base_present = present_time; if (options.debug) { LOGI("Updating frame pacing base to serial: %u (delta: %.3f ms)\n", pacing.base_serial, 1e-6 * int64_t(present_time - extrapolated_present_time)); } } } } void WSITiming::update_frame_time_smoothing(double &frame_time, double &elapsed_time) { double target_frame_time = frame_time; if (feedback.refresh_interval) target_frame_time = double(options.swap_interval * feedback.refresh_interval) * 1e-9; double actual_elapsed = elapsed_time - smoothing.offset; smoothing.elapsed += target_frame_time; double delta = actual_elapsed - smoothing.elapsed; if (delta > std::fabs(target_frame_time * 4.0)) { // We're way off, something must have happened, reset the smoothing. // Don't jump skip the frame time, other than keeping the frame_time as-is. // We might have had a natural pause, and it doesn't make sense to report absurd frame times. // Apps needing to sync to wall time over time could use elapsed_time as a guiding hand. if (options.debug) LOGI("Detected discontinuity in smoothing algorithm!\n"); smoothing.offset = elapsed_time; smoothing.elapsed = 0.0; return; } double jitter_offset = 0.0; // Accept up to 0.5% jitter to catch up or slow down smoothly to our target elapsed time. if (delta > 0.1 * target_frame_time) jitter_offset = 0.005 * target_frame_time; else if (delta < -0.1 * target_frame_time) jitter_offset = -0.005 * target_frame_time; target_frame_time += jitter_offset; smoothing.elapsed += jitter_offset; elapsed_time = smoothing.elapsed + smoothing.offset; frame_time = target_frame_time; } uint64_t WSITiming::get_refresh_interval() const { return feedback.refresh_interval; } double WSITiming::get_current_latency() const { return feedback.latency; } void WSITiming::limit_latency(Timing &new_timing) { if (options.latency_limiter != LatencyLimiter::None) { // Try to squeeze timings by sleeping, quite shaky, but very fun :) if (feedback.refresh_interval) { int64_t target = int64_t(compute_target_present_time_for_serial(serial.serial)); if (options.latency_limiter == LatencyLimiter::AdaptiveLowLatency) { int64_t latency = 0; if (get_conservative_latency(latency)) { // Keep quarter frame as buffer in case this frame is heavier than normal. latency += feedback.refresh_interval >> 2; wait_until(target - latency); uint64_t old_time = new_timing.wall_frame_begin; new_timing.wall_frame_begin = get_wall_time(); if (options.debug) { LOGI("Slept for %.3f ms for latency tuning.\n", 1e-6 * (new_timing.wall_frame_begin - old_time)); } } } else if (options.latency_limiter == LatencyLimiter::IdealPipeline) { // In the ideal pipeline we have one frame for CPU to work, // then one frame for GPU to work in parallel, so we should strive for a little under 2 frames of latency here. // The assumption is that we can kick some work to GPU at least mid-way through our frame, // which will become our slack factor. int64_t latency = feedback.refresh_interval * 2; wait_until(target - latency); uint64_t old_time = new_timing.wall_frame_begin; new_timing.wall_frame_begin = get_wall_time(); if (options.debug) { LOGI("Slept for %.3f ms for latency tuning.\n", 1e-6 * (new_timing.wall_frame_begin - old_time)); } } } } } void WSITiming::begin_frame(double &frame_time, double &elapsed_time) { promote_or_demote_frame_rate(); // Update initial frame elapsed estimate, // from here, we'll try to lock the frame time to refresh_rate +/- epsilon. if (serial.serial == 0) { smoothing.offset = elapsed_time; smoothing.elapsed = 0.0; } serial.serial++; if (options.debug) LOGI("Starting WSITiming frame serial: %u\n", serial.serial); // On X11, this is found over time by observation, so we need to adapt it. // Only after we have observed the refresh cycle duration, we can start syncing against it. if ((serial.serial & 7) == 0) update_refresh_interval(); auto &new_timing = feedback.past_timings[serial.serial & NUM_TIMING_MASK]; new_timing.wall_serial = serial.serial; new_timing.wall_frame_begin = get_wall_time(); new_timing.swap_interval_target = options.swap_interval; new_timing.result = TimingResult::Unknown; new_timing.timing = {}; update_past_presentation_timing(); // Absolute minimum case, just get some initial data before we have some real estimates. update_frame_pacing(serial.serial, new_timing.wall_frame_begin, true); update_frame_time_smoothing(frame_time, elapsed_time); limit_latency(new_timing); new_timing.wall_frame_target = compute_target_present_time_for_serial(serial.serial); } bool WSITiming::get_conservative_latency(int64_t &latency) const { latency = 0; unsigned valid_latencies = 0; for (auto &timing : feedback.past_timings) { if (timing.timing.actualPresentTime >= timing.wall_frame_begin) { latency = std::max(latency, timing.pipeline_latency); valid_latencies++; } } return valid_latencies > (NUM_TIMINGS / 2); } uint64_t WSITiming::compute_target_present_time_for_serial(uint32_t serial) { if (!pacing.have_estimate) return 0; uint64_t frame_delta = serial - pacing.base_serial; frame_delta *= options.swap_interval; uint64_t target = pacing.base_present + feedback.refresh_interval * frame_delta; return target; } void WSITiming::promote_or_demote_frame_rate() { if (!options.adaptive_swap_interval) return; if (feedback.refresh_interval == 0) return; // Analyze if we should do something with frame rate. // The heuristic is something like: // - If we observe at least 3 hitches the last window of timing events, demote frame rate. // - If we observe consistent earliestPresent < actualPresent and presentMargin is at least a quarter frame, // promote frame rate. // We can only make an analysis if all timings come from same swap interval. // This also limits how often we can automatically change frame rate. unsigned frame_drops = 0; unsigned early_frames = 0; unsigned total = 0; for (auto &timing : feedback.past_timings) { if (timing.result == TimingResult::Unknown) continue; if (options.swap_interval != timing.swap_interval_target) return; total++; if (timing.result == TimingResult::VeryEarly) early_frames++; else if (timing.result == TimingResult::TooLate) frame_drops++; } // Don't have enough data. if (total < NUM_TIMINGS / 2) return; if (early_frames == total && options.swap_interval > 1) { // We can go down in swap interval, great! set_swap_interval(options.swap_interval - 1); LOGI("Adjusted swap interval down to %u!\n", options.swap_interval); } else if (frame_drops >= 3) { if (options.swap_interval < 8) // swap interval of 8, lol { set_swap_interval(options.swap_interval + 1); LOGI("Too much hitching detected, increasing swap interval to %u!\n", options.swap_interval); } else LOGI("Still detecting hitching, but reached swap interval limit.\n"); } } bool WSITiming::fill_present_info_timing(VkPresentTimeGOOGLE &time) { time.presentID = serial.serial; time.desiredPresentTime = compute_target_present_time_for_serial(serial.serial); // Want to set the desired target close enough, // but not exactly at estimated target, since we have a rounding error cliff. // Set the target a quarter frame away from real target. if (time.desiredPresentTime != 0 && feedback.refresh_interval != 0) time.desiredPresentTime -= feedback.refresh_interval >> 4; return true; } }