diff --git a/datasketches-cpp/fi/include/frequent_items_sketch.hpp b/datasketches-cpp/fi/include/frequent_items_sketch.hpp index 6efe2b9..b2a9b86 100644 --- a/datasketches-cpp/fi/include/frequent_items_sketch.hpp +++ b/datasketches-cpp/fi/include/frequent_items_sketch.hpp @@ -64,7 +64,7 @@ public: * @param lg_start_map_size Log2 of the starting physical size of the internal hash * map managed by this sketch. */ - explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A()); + explicit frequent_items_sketch(uint8_t lg_max_map_size, size_t hashset_addr, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A()); /** * Update this sketch with an item and a positive weight (frequency count). @@ -271,6 +271,9 @@ public: */ string to_string(bool print_items = false) const; + void set_weights(W total_weight, W offset) { this->total_weight = total_weight; this->offset = offset; } + W get_offset() const { return this->offset; } + private: static const uint8_t SERIAL_VERSION = 1; static const uint8_t FAMILY_ID = 10; diff --git a/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp b/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp index 593aa03..07d9ecf 100644 --- a/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +++ b/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp @@ -33,11 +33,12 @@ template const uint8_t frequent_items_sketch::LG_MIN_MAP_SIZE; template -frequent_items_sketch::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size, const A& allocator): +frequent_items_sketch::frequent_items_sketch(uint8_t lg_max_map_size, size_t hashset_addr, uint8_t lg_start_map_size, const A& allocator): total_weight(0), offset(0), map( std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), + hashset_addr, std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), allocator ) @@ -321,7 +322,7 @@ frequent_items_sketch frequent_items_sketch: sketch.offset = offset; } if (!is.good()) - throw std::runtime_error("error reading from std::istream"); + throw std::runtime_error("error reading from std::istream"); return sketch; } diff --git a/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp b/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp index fc4cd83..c667271 100644 --- a/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +++ b/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp @@ -39,7 +39,7 @@ public: using AllocV = typename std::allocator_traits::template rebind_alloc; using AllocU16 = typename std::allocator_traits::template rebind_alloc; - reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const A& allocator); + reverse_purge_hash_map(uint8_t lg_size, size_t hashset_addr, uint8_t lg_max_size, const A& allocator); reverse_purge_hash_map(const reverse_purge_hash_map& other); reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept; ~reverse_purge_hash_map(); @@ -66,6 +66,7 @@ private: static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge A allocator_; + size_t hashset_addr_; uint8_t lg_cur_size_; uint8_t lg_max_size_; uint32_t num_active_; diff --git a/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp b/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp index 0b05d89..eeb0158 100644 --- a/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +++ b/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp @@ -27,6 +27,8 @@ #include "MurmurHash3.h" +void remove_from_hashset(size_t,size_t) noexcept; + namespace datasketches { // clang++ seems to require this declaration for CMAKE_BUILD_TYPE='Debug" @@ -34,8 +36,9 @@ template constexpr uint32_t reverse_purge_hash_map::MAX_SAMPLE_SIZE; template -reverse_purge_hash_map::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size, const A& allocator): +reverse_purge_hash_map::reverse_purge_hash_map(uint8_t lg_cur_size, uintptr_t hashset_addr, uint8_t lg_max_size, const A& allocator): allocator_(allocator), +hashset_addr_(hashset_addr), lg_cur_size_(lg_cur_size), lg_max_size_(lg_max_size), num_active_(0), @@ -53,6 +56,7 @@ states_(nullptr) template reverse_purge_hash_map::reverse_purge_hash_map(const reverse_purge_hash_map& other): allocator_(other.allocator_), +hashset_addr_(other.hashset_addr_), lg_cur_size_(other.lg_cur_size_), lg_max_size_(other.lg_max_size_), num_active_(other.num_active_), @@ -81,6 +85,7 @@ states_(nullptr) template reverse_purge_hash_map::reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept: allocator_(std::move(other.allocator_)), +hashset_addr_(other.hashset_addr_), lg_cur_size_(other.lg_cur_size_), lg_max_size_(other.lg_max_size_), num_active_(other.num_active_), @@ -245,6 +250,7 @@ void reverse_purge_hash_map::hash_delete(uint32_t delete_index) { // item to move to this location // if none are found, the status is changed states_[delete_index] = 0; // mark as empty + remove_from_hashset(hashset_addr_, keys_[delete_index]); keys_[delete_index].~K(); uint16_t drift = 1; const uint32_t mask = (1 << lg_cur_size_) - 1;