/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #ifndef _HLLSKETCH_INTERNAL_HPP_ #define _HLLSKETCH_INTERNAL_HPP_ #include "hll.hpp" #include "HllUtil.hpp" #include "HllSketchImplFactory.hpp" #include "CouponList.hpp" #include "HllArray.hpp" #include "common_defs.hpp" #include #include #include #include #include #include namespace datasketches { typedef union { int64_t longBytes; double doubleBytes; } longDoubleUnion; template hll_sketch_alloc::hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) { HllUtil::checkLgK(lg_config_k); if (start_full_size) { sketch_impl = HllSketchImplFactory::newHll(lg_config_k, tgt_type, start_full_size, allocator); } else { typedef typename std::allocator_traits::template rebind_alloc> clAlloc; sketch_impl = new (clAlloc(allocator).allocate(1)) CouponList(lg_config_k, tgt_type, hll_mode::LIST, allocator); } } template hll_sketch_alloc hll_sketch_alloc::deserialize(std::istream& is, const A& allocator) { HllSketchImpl* impl = HllSketchImplFactory::deserialize(is, allocator); return hll_sketch_alloc(impl); } template hll_sketch_alloc hll_sketch_alloc::deserialize(const void* bytes, size_t len, const A& allocator) { HllSketchImpl* impl = HllSketchImplFactory::deserialize(bytes, len, allocator); return hll_sketch_alloc(impl); } template hll_sketch_alloc::~hll_sketch_alloc() { if (sketch_impl != nullptr) { sketch_impl->get_deleter()(sketch_impl); } } template hll_sketch_alloc::hll_sketch_alloc(const hll_sketch_alloc& that) : sketch_impl(that.sketch_impl->copy()) {} template hll_sketch_alloc::hll_sketch_alloc(const hll_sketch_alloc& that, target_hll_type tgt_type) : sketch_impl(that.sketch_impl->copyAs(tgt_type)) {} template hll_sketch_alloc::hll_sketch_alloc(hll_sketch_alloc&& that) noexcept : sketch_impl(nullptr) { std::swap(sketch_impl, that.sketch_impl); } template hll_sketch_alloc::hll_sketch_alloc(HllSketchImpl* that) : sketch_impl(that) {} template hll_sketch_alloc hll_sketch_alloc::operator=(const hll_sketch_alloc& other) { sketch_impl->get_deleter()(sketch_impl); sketch_impl = other.sketch_impl->copy(); return *this; } template hll_sketch_alloc hll_sketch_alloc::operator=(hll_sketch_alloc&& other) { std::swap(sketch_impl, other.sketch_impl); return *this; } template void hll_sketch_alloc::reset() { // TODO: need to allow starting from a full-sized sketch // (either here or in other implementation) sketch_impl = sketch_impl->reset(); } template void hll_sketch_alloc::update(const std::string& datum) { if (datum.empty()) { return; } HashState hashResult; HllUtil::hash(datum.c_str(), datum.length(), DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::update(uint64_t datum) { // no sign extension with 64 bits so no need to cast to signed value HashState hashResult; HllUtil::hash(&datum, sizeof(uint64_t), DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::update(uint32_t datum) { update(static_cast(datum)); } template void hll_sketch_alloc::update(uint16_t datum) { update(static_cast(datum)); } template void hll_sketch_alloc::update(uint8_t datum) { update(static_cast(datum)); } template void hll_sketch_alloc::update(int64_t datum) { HashState hashResult; HllUtil::hash(&datum, sizeof(int64_t), DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::update(int32_t datum) { const int64_t val = static_cast(datum); HashState hashResult; HllUtil::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::update(int16_t datum) { const int64_t val = static_cast(datum); HashState hashResult; HllUtil::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::update(int8_t datum) { const int64_t val = static_cast(datum); HashState hashResult; HllUtil::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::update(double datum) { longDoubleUnion d; d.doubleBytes = static_cast(datum); if (datum == 0.0) { d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0 } else if (std::isnan(d.doubleBytes)) { d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits() } HashState hashResult; HllUtil::hash(&d, sizeof(double), DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::update(float datum) { longDoubleUnion d; d.doubleBytes = static_cast(datum); if (datum == 0.0) { d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0 } else if (std::isnan(d.doubleBytes)) { d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits() } HashState hashResult; HllUtil::hash(&d, sizeof(double), DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::update(const void* data, size_t lengthBytes) { if (data == nullptr) { return; } HashState hashResult; HllUtil::hash(data, lengthBytes, DEFAULT_SEED, hashResult); coupon_update(HllUtil::coupon(hashResult)); } template void hll_sketch_alloc::coupon_update(uint32_t coupon) { if (coupon == hll_constants::EMPTY) { return; } HllSketchImpl* result = this->sketch_impl->couponUpdate(coupon); if (result != this->sketch_impl) { this->sketch_impl->get_deleter()(this->sketch_impl); this->sketch_impl = result; } } template void hll_sketch_alloc::serialize_compact(std::ostream& os) const { return sketch_impl->serialize(os, true); } template void hll_sketch_alloc::serialize_updatable(std::ostream& os) const { return sketch_impl->serialize(os, false); } template vector_u8 hll_sketch_alloc::serialize_compact(unsigned header_size_bytes) const { return sketch_impl->serialize(true, header_size_bytes); } template vector_u8 hll_sketch_alloc::serialize_updatable() const { return sketch_impl->serialize(false, 0); } template string hll_sketch_alloc::to_string(const bool summary, const bool detail, const bool aux_detail, const bool all) const { std::basic_ostringstream, AllocChar> os; if (summary) { os << "### HLL sketch summary:" << std::endl << " Log Config K : " << get_lg_config_k() << std::endl << " Hll Target : " << type_as_string() << std::endl << " Current Mode : " << mode_as_string() << std::endl << " LB : " << get_lower_bound(1) << std::endl << " Estimate : " << get_estimate() << std::endl << " UB : " << get_upper_bound(1) << std::endl << " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl; if (get_current_mode() == HLL) { HllArray* hllArray = (HllArray*) sketch_impl; os << " CurMin : " << hllArray->getCurMin() << std::endl << " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl << " HipAccum : " << hllArray->getHipAccum() << std::endl << " KxQ0 : " << hllArray->getKxQ0() << std::endl << " KxQ1 : " << hllArray->getKxQ1() << std::endl; if (get_target_type() == HLL_4) { const Hll4Array* hll4_ptr = static_cast*>(sketch_impl); os << " Aux table? : " << (hll4_ptr->getAuxHashMap() != nullptr ? "true" : "false") << std::endl; } } else { os << " Coupon count : " << std::to_string(((CouponList*) sketch_impl)->getCouponCount()) << std::endl; } os << "### End HLL sketch summary" << std::endl; } if (detail) { os << "### HLL sketch data detail:" << std::endl; if (get_current_mode() == HLL) { const HllArray* hll_ptr = static_cast*>(sketch_impl); os << std::left << std::setw(10) << "Slot" << std::setw(6) << "Value" << std::endl; auto it = hll_ptr->begin(all); while (it != hll_ptr->end()) { os << std::setw(10) << HllUtil::getLow26(*it); os << std::setw(6) << HllUtil::getValue(*it); os << std::endl; ++it; } } else { const CouponList* list_ptr = static_cast*>(sketch_impl); os << std::left; os << std::setw(10) << "Index"; os << std::setw(10) << "Key"; os << std::setw(10) << "Slot"; os << std::setw(6) << "Value"; os << std::endl; auto it = list_ptr->begin(all); int i = 0; int mask = (1 << get_lg_config_k()) - 1; while (it != list_ptr->end()) { os << std::setw(10) << i; os << std::setw(10) << HllUtil::getLow26(*it); os << std::setw(10) << (HllUtil::getLow26(*it) & mask); os << std::setw(6) << HllUtil::getValue(*it); os << std::endl; ++it; ++i; } } os << "### End HLL sketch data detail" << std::endl; } if (aux_detail) { if ((get_current_mode() == HLL) && (get_target_type() == HLL_4)) { const Hll4Array* hll4_ptr = static_cast*>(sketch_impl); const AuxHashMap* aux_ptr = hll4_ptr->getAuxHashMap(); if (aux_ptr != nullptr) { os << "### HLL sketch aux detail:" << std::endl; os << std::left; os << std::setw(10) << "Index"; os << std::setw(10) << "Key"; os << std::setw(10) << "Slot"; os << std::setw(6) << "Value"; os << std::endl; auto it = aux_ptr->begin(all); int i = 0; int mask = (1 << get_lg_config_k()) - 1; while (it != aux_ptr->end()) { os << std::setw(10) << i; os << std::setw(10) << HllUtil::getLow26(*it); os << std::setw(10) << (HllUtil::getLow26(*it) & mask); os << std::setw(6) << HllUtil::getValue(*it); os << std::endl; ++it; ++i; } os << "### End HLL sketch aux detail" << std::endl; } } } return os.str(); } template double hll_sketch_alloc::get_estimate() const { return sketch_impl->getEstimate(); } template double hll_sketch_alloc::get_composite_estimate() const { return sketch_impl->getCompositeEstimate(); } template double hll_sketch_alloc::get_lower_bound(uint8_t numStdDev) const { return sketch_impl->getLowerBound(numStdDev); } template double hll_sketch_alloc::get_upper_bound(uint8_t numStdDev) const { return sketch_impl->getUpperBound(numStdDev); } template hll_mode hll_sketch_alloc::get_current_mode() const { return sketch_impl->getCurMode(); } template uint8_t hll_sketch_alloc::get_lg_config_k() const { return sketch_impl->getLgConfigK(); } template target_hll_type hll_sketch_alloc::get_target_type() const { return sketch_impl->getTgtHllType(); } template bool hll_sketch_alloc::is_out_of_order_flag() const { return sketch_impl->isOutOfOrderFlag(); } template bool hll_sketch_alloc::is_estimation_mode() const { return true; } template uint32_t hll_sketch_alloc::get_updatable_serialization_bytes() const { return sketch_impl->getUpdatableSerializationBytes(); } template uint32_t hll_sketch_alloc::get_compact_serialization_bytes() const { return sketch_impl->getCompactSerializationBytes(); } template bool hll_sketch_alloc::is_compact() const { return sketch_impl->isCompact(); } template bool hll_sketch_alloc::is_empty() const { return sketch_impl->isEmpty(); } template std::string hll_sketch_alloc::type_as_string() const { switch (sketch_impl->getTgtHllType()) { case target_hll_type::HLL_4: return std::string("HLL_4"); case target_hll_type::HLL_6: return std::string("HLL_6"); case target_hll_type::HLL_8: return std::string("HLL_8"); default: throw std::runtime_error("Sketch state error: Invalid target_hll_type"); } } template std::string hll_sketch_alloc::mode_as_string() const { switch (sketch_impl->getCurMode()) { case LIST: return std::string("LIST"); case SET: return std::string("SET"); case HLL: return std::string("HLL"); default: throw std::runtime_error("Sketch state error: Invalid hll_mode"); } } template uint32_t hll_sketch_alloc::get_max_updatable_serialization_bytes(uint8_t lg_config_k, const target_hll_type tgtHllType) { uint32_t arrBytes; if (tgtHllType == target_hll_type::HLL_4) { const uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[lg_config_k]; arrBytes = HllArray::hll4ArrBytes(lg_config_k) + auxBytes; } else if (tgtHllType == target_hll_type::HLL_6) { arrBytes = HllArray::hll6ArrBytes(lg_config_k); } else { //HLL_8 arrBytes = HllArray::hll8ArrBytes(lg_config_k); } return hll_constants::HLL_BYTE_ARR_START + arrBytes; } template double hll_sketch_alloc::get_rel_err(bool upperBound, bool unioned, uint8_t lg_config_k, uint8_t numStdDev) { return HllUtil::getRelErr(upperBound, unioned, lg_config_k, numStdDev); } } #endif // _HLLSKETCH_INTERNAL_HPP_