// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include "db/memtable.h" #include "memory/arena.h" #include "memtable/skiplist.h" #include "port/port.h" #include "rocksdb/memtablerep.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/utilities/options_type.h" #include "util/murmurhash.h" namespace ROCKSDB_NAMESPACE { namespace { class HashSkipListRep : public MemTableRep { public: HashSkipListRep(const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, size_t bucket_size, int32_t skiplist_height, int32_t skiplist_branching_factor); void Insert(KeyHandle handle) override; bool Contains(const char* key) const override; size_t ApproximateMemoryUsage() override; void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override; ~HashSkipListRep() override; MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override; MemTableRep::Iterator* GetDynamicPrefixIterator( Arena* arena = nullptr) override; private: friend class DynamicIterator; using Bucket = SkipList; size_t bucket_size_; const int32_t skiplist_height_; const int32_t skiplist_branching_factor_; // Maps slices (which are transformed user keys) to buckets of keys sharing // the same transform. std::atomic* buckets_; // The user-supplied transform whose domain is the user keys. const SliceTransform* transform_; const MemTableRep::KeyComparator& compare_; // immutable after construction Allocator* const allocator_; inline size_t GetHash(const Slice& slice) const { return MurmurHash(slice.data(), static_cast(slice.size()), 0) % bucket_size_; } inline Bucket* GetBucket(size_t i) const { return buckets_[i].load(std::memory_order_acquire); } inline Bucket* GetBucket(const Slice& slice) const { return GetBucket(GetHash(slice)); } // Get a bucket from buckets_. If the bucket hasn't been initialized yet, // initialize it before returning. Bucket* GetInitializedBucket(const Slice& transformed); class Iterator : public MemTableRep::Iterator { public: explicit Iterator(Bucket* list, bool own_list = true, Arena* arena = nullptr) : list_(list), iter_(list), own_list_(own_list), arena_(arena) {} ~Iterator() override { // if we own the list, we should also delete it if (own_list_) { assert(list_ != nullptr); delete list_; } } // Returns true iff the iterator is positioned at a valid node. bool Valid() const override { return list_ != nullptr && iter_.Valid(); } // Returns the key at the current position. // REQUIRES: Valid() const char* key() const override { assert(Valid()); return iter_.key(); } // Advances to the next position. // REQUIRES: Valid() void Next() override { assert(Valid()); iter_.Next(); } // Advances to the previous position. // REQUIRES: Valid() void Prev() override { assert(Valid()); iter_.Prev(); } // Advance to the first entry with a key >= target void Seek(const Slice& internal_key, const char* memtable_key) override { if (list_ != nullptr) { const char* encoded_key = (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, internal_key); iter_.Seek(encoded_key); } } // Retreat to the last entry with a key <= target void SeekForPrev(const Slice& /*internal_key*/, const char* /*memtable_key*/) override { // not supported assert(false); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToFirst() override { if (list_ != nullptr) { iter_.SeekToFirst(); } } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToLast() override { if (list_ != nullptr) { iter_.SeekToLast(); } } protected: void Reset(Bucket* list) { if (own_list_) { assert(list_ != nullptr); delete list_; } list_ = list; iter_.SetList(list); own_list_ = false; } private: // if list_ is nullptr, we should NEVER call any methods on iter_ // if list_ is nullptr, this Iterator is not Valid() Bucket* list_; Bucket::Iterator iter_; // here we track if we own list_. If we own it, we are also // responsible for it's cleaning. This is a poor man's std::shared_ptr bool own_list_; std::unique_ptr arena_; std::string tmp_; // For passing to EncodeKey }; class DynamicIterator : public HashSkipListRep::Iterator { public: explicit DynamicIterator(const HashSkipListRep& memtable_rep) : HashSkipListRep::Iterator(nullptr, false), memtable_rep_(memtable_rep) {} // Advance to the first entry with a key >= target void Seek(const Slice& k, const char* memtable_key) override { auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k)); Reset(memtable_rep_.GetBucket(transformed)); HashSkipListRep::Iterator::Seek(k, memtable_key); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToFirst() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToLast() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } private: // the underlying memtable const HashSkipListRep& memtable_rep_; }; class EmptyIterator : public MemTableRep::Iterator { // This is used when there wasn't a bucket. It is cheaper than // instantiating an empty bucket over which to iterate. public: EmptyIterator() = default; bool Valid() const override { return false; } const char* key() const override { assert(false); return nullptr; } void Next() override {} void Prev() override {} void Seek(const Slice& /*internal_key*/, const char* /*memtable_key*/) override {} void SeekForPrev(const Slice& /*internal_key*/, const char* /*memtable_key*/) override {} void SeekToFirst() override {} void SeekToLast() override {} private: }; }; HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, size_t bucket_size, int32_t skiplist_height, int32_t skiplist_branching_factor) : MemTableRep(allocator), bucket_size_(bucket_size), skiplist_height_(skiplist_height), skiplist_branching_factor_(skiplist_branching_factor), transform_(transform), compare_(compare), allocator_(allocator) { auto mem = allocator->AllocateAligned(sizeof(std::atomic) * bucket_size); buckets_ = new (mem) std::atomic[bucket_size]; for (size_t i = 0; i < bucket_size_; ++i) { buckets_[i].store(nullptr, std::memory_order_relaxed); } } HashSkipListRep::~HashSkipListRep() = default; HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket( const Slice& transformed) { size_t hash = GetHash(transformed); auto bucket = GetBucket(hash); if (bucket == nullptr) { auto addr = allocator_->AllocateAligned(sizeof(Bucket)); bucket = new (addr) Bucket(compare_, allocator_, skiplist_height_, skiplist_branching_factor_); buckets_[hash].store(bucket, std::memory_order_release); } return bucket; } void HashSkipListRep::Insert(KeyHandle handle) { auto* key = static_cast(handle); assert(!Contains(key)); auto transformed = transform_->Transform(UserKey(key)); auto bucket = GetInitializedBucket(transformed); bucket->Insert(key); } bool HashSkipListRep::Contains(const char* key) const { auto transformed = transform_->Transform(UserKey(key)); auto bucket = GetBucket(transformed); if (bucket == nullptr) { return false; } return bucket->Contains(key); } size_t HashSkipListRep::ApproximateMemoryUsage() { return 0; } void HashSkipListRep::Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) { auto transformed = transform_->Transform(k.user_key()); auto bucket = GetBucket(transformed); if (bucket != nullptr) { Bucket::Iterator iter(bucket); for (iter.Seek(k.memtable_key().data()); iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) { } } } MemTableRep::Iterator* HashSkipListRep::GetIterator(Arena* arena) { // allocate a new arena of similar size to the one currently in use Arena* new_arena = new Arena(allocator_->BlockSize()); auto list = new Bucket(compare_, new_arena); for (size_t i = 0; i < bucket_size_; ++i) { auto bucket = GetBucket(i); if (bucket != nullptr) { Bucket::Iterator itr(bucket); for (itr.SeekToFirst(); itr.Valid(); itr.Next()) { list->Insert(itr.key()); } } } if (arena == nullptr) { return new Iterator(list, true, new_arena); } else { auto mem = arena->AllocateAligned(sizeof(Iterator)); return new (mem) Iterator(list, true, new_arena); } } MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator(Arena* arena) { if (arena == nullptr) { return new DynamicIterator(*this); } else { auto mem = arena->AllocateAligned(sizeof(DynamicIterator)); return new (mem) DynamicIterator(*this); } } struct HashSkipListRepOptions { static const char* kName() { return "HashSkipListRepFactoryOptions"; } size_t bucket_count; int32_t skiplist_height; int32_t skiplist_branching_factor; }; static std::unordered_map hash_skiplist_info = { {"bucket_count", {offsetof(struct HashSkipListRepOptions, bucket_count), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, {"skiplist_height", {offsetof(struct HashSkipListRepOptions, skiplist_height), OptionType::kInt32T, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, {"branching_factor", {offsetof(struct HashSkipListRepOptions, skiplist_branching_factor), OptionType::kInt32T, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, }; class HashSkipListRepFactory : public MemTableRepFactory { public: explicit HashSkipListRepFactory(size_t bucket_count, int32_t skiplist_height, int32_t skiplist_branching_factor) { options_.bucket_count = bucket_count; options_.skiplist_height = skiplist_height; options_.skiplist_branching_factor = skiplist_branching_factor; RegisterOptions(&options_, &hash_skiplist_info); } using MemTableRepFactory::CreateMemTableRep; MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, Logger* logger) override; static const char* kClassName() { return "HashSkipListRepFactory"; } static const char* kNickName() { return "prefix_hash"; } const char* Name() const override { return kClassName(); } const char* NickName() const override { return kNickName(); } private: HashSkipListRepOptions options_; }; } // namespace MemTableRep* HashSkipListRepFactory::CreateMemTableRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, Logger* /*logger*/) { return new HashSkipListRep(compare, allocator, transform, options_.bucket_count, options_.skiplist_height, options_.skiplist_branching_factor); } MemTableRepFactory* NewHashSkipListRepFactory( size_t bucket_count, int32_t skiplist_height, int32_t skiplist_branching_factor) { return new HashSkipListRepFactory(bucket_count, skiplist_height, skiplist_branching_factor); } } // namespace ROCKSDB_NAMESPACE