/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #ifndef KOLMOGOROV_SMIRNOV_IMPL_HPP_ #define KOLMOGOROV_SMIRNOV_IMPL_HPP_ namespace datasketches { // type resolver template kll_quantile_calculator make_quantile_calculator(const kll_sketch& sketch) { return kll_quantile_calculator(sketch); } template double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) { using Comparator = typename Sketch::comparator; auto calc1 = make_quantile_calculator(sketch1); auto calc2 = make_quantile_calculator(sketch2); auto it1 = calc1.begin(); auto it2 = calc2.begin(); const auto n1 = sketch1.get_n(); const auto n2 = sketch2.get_n(); double delta = 0; while (it1 != calc1.end() && it2 != calc2.end()) { const double norm_cum_wt1 = static_cast((*it1).second) / n1; const double norm_cum_wt2 = static_cast((*it2).second) / n2; delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2)); if (Comparator()((*it1).first, (*it2).first)) { ++it1; } else if (Comparator()((*it2).first, (*it1).first)) { ++it2; } else { ++it1; ++it2; } } const double norm_cum_wt1 = it1 == calc1.end() ? 1 : static_cast((*it1).second) / n1; const double norm_cum_wt2 = it2 == calc2.end() ? 1 : static_cast((*it2).second) / n2; delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2)); return delta; } template double kolmogorov_smirnov::threshold(const Sketch& sketch1, const Sketch& sketch2, double p) { const double r1 = sketch1.get_num_retained(); const double r2 = sketch2.get_num_retained(); const double alpha_factor = sqrt(-0.5 * log(0.5 * p)); const double delta_area_threshold = alpha_factor * sqrt((r1 + r2) / (r1 * r2)); const double eps1 = sketch1.get_normalized_rank_error(false); const double eps2 = sketch2.get_normalized_rank_error(false); return delta_area_threshold + eps1 + eps2; } template bool kolmogorov_smirnov::test(const Sketch& sketch1, const Sketch& sketch2, double p) { return delta(sketch1, sketch2) > threshold(sketch1, sketch2, p); } } /* namespace datasketches */ #endif