// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/block_based/block_based_table_reader.h" #include #include #include #include "cache/cache_reservation_manager.h" #include "db/db_test_util.h" #include "db/table_properties_collector.h" #include "file/file_util.h" #include "options/options_helper.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/compression_type.h" #include "rocksdb/db.h" #include "rocksdb/file_system.h" #include "rocksdb/options.h" #include "table/block_based/block_based_table_builder.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/partitioned_index_iterator.h" #include "table/format.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { class BlockBasedTableReaderBaseTest : public testing::Test { public: static constexpr int kBytesPerEntry = 256; // 16 = (default block size) 4 * 1024 / kBytesPerEntry static constexpr int kEntriesPerBlock = 16; protected: // Prepare key-value pairs to occupy multiple blocks. // Each (key, value) pair is `kBytesPerEntry` byte, every kEntriesPerBlock // pairs constitute 1 block. // If mixed_with_human_readable_string_value == true, // then adjacent blocks contain values with different compression // complexity: human readable strings are easier to compress than random // strings. key is an internal key. // When ts_sz > 0 and `same_key_diff_ts` is true, this // function generate keys with the same user provided key, with different // user defined timestamps and different sequence number to differentiate them static std::vector> GenerateKVMap( int num_block = 2, bool mixed_with_human_readable_string_value = false, size_t ts_sz = 0, bool same_key_diff_ts = false) { std::vector> kv; SequenceNumber seq_no = 0; uint64_t current_udt = 0; if (same_key_diff_ts) { // These numbers are based on the number of keys to create + an arbitrary // buffer number (100) to avoid overflow. current_udt = kEntriesPerBlock * num_block + 100; seq_no = kEntriesPerBlock * num_block + 100; } Random rnd(101); uint32_t key = 0; // To make each (key, value) pair occupy exactly kBytesPerEntry bytes. int value_size = kBytesPerEntry - (8 + static_cast(ts_sz) + static_cast(kNumInternalBytes)); for (int block = 0; block < num_block; block++) { for (int i = 0; i < kEntriesPerBlock; i++) { char k[9] = {0}; // Internal key is constructed directly from this key, // and internal key size is required to be >= 8 bytes, // so use %08u as the format string. snprintf(k, sizeof(k), "%08u", key); std::string v; if (mixed_with_human_readable_string_value) { v = (block % 2) ? rnd.HumanReadableString(value_size) : rnd.RandomString(value_size); } else { v = rnd.RandomString(value_size); } std::string user_key = std::string(k); if (ts_sz > 0) { if (same_key_diff_ts) { PutFixed64(&user_key, current_udt); current_udt -= 1; } else { PutFixed64(&user_key, 0); } } InternalKey internal_key(user_key, seq_no, ValueType::kTypeValue); kv.emplace_back(internal_key.Encode().ToString(), v); if (same_key_diff_ts) { seq_no -= 1; } else { key++; } } } return kv; } void SetUp() override { SetupSyncPointsToMockDirectIO(); test_dir_ = test::PerThreadDBPath("block_based_table_reader_test"); env_ = Env::Default(); fs_ = FileSystem::Default(); ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr)); ConfigureTableFactory(); } virtual void ConfigureTableFactory() = 0; void TearDown() override { EXPECT_OK(DestroyDir(env_, test_dir_)); } // Creates a table with the specificied key value pairs (kv). void CreateTable(const std::string& table_name, const ImmutableOptions& ioptions, const CompressionType& compression_type, const std::vector>& kv, uint32_t compression_parallel_threads = 1, uint32_t compression_dict_bytes = 0) { std::unique_ptr writer; NewFileWriter(table_name, &writer); InternalKeyComparator comparator(ioptions.user_comparator); ColumnFamilyOptions cf_options; cf_options.prefix_extractor = options_.prefix_extractor; MutableCFOptions moptions(cf_options); CompressionOptions compression_opts; compression_opts.parallel_threads = compression_parallel_threads; // Enable compression dictionary and set a buffering limit that is the same // as each block's size. compression_opts.max_dict_bytes = compression_dict_bytes; compression_opts.max_dict_buffer_bytes = compression_dict_bytes; InternalTblPropCollFactories factories; const ReadOptions read_options; const WriteOptions write_options; std::unique_ptr table_builder( options_.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, read_options, write_options, comparator, &factories, compression_type, compression_opts, 0 /* column_family_id */, kDefaultColumnFamilyName, -1 /* level */), writer.get())); // Build table. for (auto it = kv.begin(); it != kv.end(); it++) { std::string v = it->second; table_builder->Add(it->first, v); } ASSERT_OK(table_builder->Finish()); } void NewBlockBasedTableReader(const FileOptions& foptions, const ImmutableOptions& ioptions, const InternalKeyComparator& comparator, const std::string& table_name, std::unique_ptr* table, bool prefetch_index_and_filter_in_cache = true, Status* status = nullptr, bool user_defined_timestamps_persisted = true) { const MutableCFOptions moptions(options_); TableReaderOptions table_reader_options = TableReaderOptions( ioptions, moptions.prefix_extractor, foptions, comparator, 0 /* block_protection_bytes_per_key */, false /* _skip_filters */, false /* _immortal */, false /* _force_direct_prefetch */, -1 /* _level */, nullptr /* _block_cache_tracer */, 0 /* _max_file_size_for_l0_meta_pin */, "" /* _cur_db_session_id */, 0 /* _cur_file_num */, {} /* _unique_id */, 0 /* _largest_seqno */, 0 /* _tail_size */, user_defined_timestamps_persisted); std::unique_ptr file; NewFileReader(table_name, foptions, &file); uint64_t file_size = 0; ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size)); ReadOptions read_opts; read_opts.verify_checksums = true; std::unique_ptr general_table; Status s = options_.table_factory->NewTableReader( read_opts, table_reader_options, std::move(file), file_size, &general_table, prefetch_index_and_filter_in_cache); if (s.ok()) { table->reset(static_cast(general_table.release())); } if (status) { *status = s; } } std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } std::string test_dir_; Env* env_; std::shared_ptr fs_; Options options_; private: void WriteToFile(const std::string& content, const std::string& filename) { std::unique_ptr f; ASSERT_OK(fs_->NewWritableFile(Path(filename), FileOptions(), &f, nullptr)); ASSERT_OK(f->Append(content, IOOptions(), nullptr)); ASSERT_OK(f->Close(IOOptions(), nullptr)); } void NewFileWriter(const std::string& filename, std::unique_ptr* writer) { std::string path = Path(filename); EnvOptions env_options; FileOptions foptions; std::unique_ptr file; ASSERT_OK(fs_->NewWritableFile(path, foptions, &file, nullptr)); writer->reset(new WritableFileWriter(std::move(file), path, env_options)); } void NewFileReader(const std::string& filename, const FileOptions& opt, std::unique_ptr* reader) { std::string path = Path(filename); std::unique_ptr f; ASSERT_OK(fs_->NewRandomAccessFile(path, opt, &f, nullptr)); reader->reset(new RandomAccessFileReader(std::move(f), path, env_->GetSystemClock().get())); } }; // Param 1: compression type // Param 2: whether to use direct reads // Param 3: Block Based Table Index type // Param 4: BBTO no_block_cache option // Param 5: test mode for the user-defined timestamp feature // Param 6: number of parallel compression threads // Param 7: CompressionOptions.max_dict_bytes and // CompressionOptions.max_dict_buffer_bytes to enable/disable // compression dictionary. // Param 8: test mode to specify the pattern for generating key / value. When // true, generate keys with the same user provided key, different // user-defined timestamps (if udt enabled), different sequence // numbers. This test mode is used for testing `Get`. When false, // generate keys with different user provided key, same user-defined // timestamps (if udt enabled), same sequence number. This test mode is // used for testing `Get`, `MultiGet`, and `NewIterator`. class BlockBasedTableReaderTest : public BlockBasedTableReaderBaseTest, public testing::WithParamInterface> { protected: void SetUp() override { compression_type_ = std::get<0>(GetParam()); use_direct_reads_ = std::get<1>(GetParam()); test::UserDefinedTimestampTestMode udt_test_mode = std::get<4>(GetParam()); udt_enabled_ = test::IsUDTEnabled(udt_test_mode); persist_udt_ = test::ShouldPersistUDT(udt_test_mode); compression_parallel_threads_ = std::get<5>(GetParam()); compression_dict_bytes_ = std::get<6>(GetParam()); same_key_diff_ts_ = std::get<7>(GetParam()); BlockBasedTableReaderBaseTest::SetUp(); } void ConfigureTableFactory() override { BlockBasedTableOptions opts; opts.index_type = std::get<2>(GetParam()); opts.no_block_cache = std::get<3>(GetParam()); opts.filter_policy.reset(NewBloomFilterPolicy(10, false)); opts.partition_filters = opts.index_type == BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; opts.metadata_cache_options.partition_pinning = PinningTier::kAll; options_.table_factory.reset( static_cast(NewBlockBasedTableFactory(opts))); options_.prefix_extractor = std::shared_ptr(NewFixedPrefixTransform(3)); } CompressionType compression_type_; bool use_direct_reads_; bool udt_enabled_; bool persist_udt_; uint32_t compression_parallel_threads_; uint32_t compression_dict_bytes_; bool same_key_diff_ts_; }; class BlockBasedTableReaderGetTest : public BlockBasedTableReaderTest {}; TEST_P(BlockBasedTableReaderGetTest, Get) { Options options; if (udt_enabled_) { options.comparator = test::BytewiseComparatorWithU64TsWrapper(); } options.persist_user_defined_timestamps = persist_udt_; size_t ts_sz = options.comparator->timestamp_size(); std::vector> kv = BlockBasedTableReaderBaseTest::GenerateKVMap( 100 /* num_block */, true /* mixed_with_human_readable_string_value */, ts_sz, same_key_diff_ts_); std::string table_name = "BlockBasedTableReaderGetTest_Get" + CompressionTypeToString(compression_type_); ImmutableOptions ioptions(options); CreateTable(table_name, ioptions, compression_type_, kv, compression_parallel_threads_, compression_dict_bytes_); std::unique_ptr table; FileOptions foptions; foptions.use_direct_reads = use_direct_reads_; InternalKeyComparator comparator(options.comparator); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table, true /* prefetch_index_and_filter_in_cache */, nullptr /* status */, persist_udt_); ReadOptions read_opts; ASSERT_OK( table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum)); for (size_t i = 0; i < kv.size(); i += 1) { Slice key = kv[i].first; Slice lkey = key; std::string lookup_ikey; if (udt_enabled_ && !persist_udt_) { // When user-defined timestamps are collapsed to be the minimum timestamp, // we also read with the minimum timestamp to be able to retrieve each // value. ReplaceInternalKeyWithMinTimestamp(&lookup_ikey, key, ts_sz); lkey = lookup_ikey; } // Reading the first entry in a block caches the whole block. if (i % kEntriesPerBlock == 0) { ASSERT_FALSE(table->TEST_KeyInCache(read_opts, lkey.ToString())); } else { ASSERT_TRUE(table->TEST_KeyInCache(read_opts, lkey.ToString())); } PinnableSlice value; GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, ExtractUserKey(key), &value, nullptr, nullptr, nullptr, nullptr, true /* do_merge */, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); ASSERT_OK(table->Get(read_opts, lkey, &get_context, nullptr)); ASSERT_EQ(value.ToString(), kv[i].second); ASSERT_TRUE(table->TEST_KeyInCache(read_opts, lkey.ToString())); } } // Tests MultiGet in both direct IO and non-direct IO mode. // The keys should be in cache after MultiGet. TEST_P(BlockBasedTableReaderTest, MultiGet) { Options options; ReadOptions read_opts; std::string dummy_ts(sizeof(uint64_t), '\0'); Slice read_timestamp = dummy_ts; if (udt_enabled_) { options.comparator = test::BytewiseComparatorWithU64TsWrapper(); read_opts.timestamp = &read_timestamp; } options.persist_user_defined_timestamps = persist_udt_; size_t ts_sz = options.comparator->timestamp_size(); std::vector> kv = BlockBasedTableReaderBaseTest::GenerateKVMap( 100 /* num_block */, true /* mixed_with_human_readable_string_value */, ts_sz); // Prepare keys, values, and statuses for MultiGet. autovector keys; autovector keys_without_timestamps; autovector values; autovector statuses; autovector expected_values; { const int step = static_cast(kv.size()) / MultiGetContext::MAX_BATCH_SIZE; auto it = kv.begin(); for (int i = 0; i < MultiGetContext::MAX_BATCH_SIZE; i++) { keys.emplace_back(it->first); if (ts_sz > 0) { Slice ukey_without_ts = ExtractUserKeyAndStripTimestamp(it->first, ts_sz); keys_without_timestamps.push_back(ukey_without_ts); } else { keys_without_timestamps.emplace_back(ExtractUserKey(it->first)); } values.emplace_back(); statuses.emplace_back(); expected_values.push_back(&(it->second)); std::advance(it, step); } } std::string table_name = "BlockBasedTableReaderTest_MultiGet" + CompressionTypeToString(compression_type_); ImmutableOptions ioptions(options); CreateTable(table_name, ioptions, compression_type_, kv, compression_parallel_threads_, compression_dict_bytes_); std::unique_ptr table; FileOptions foptions; foptions.use_direct_reads = use_direct_reads_; InternalKeyComparator comparator(options.comparator); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table, true /* bool prefetch_index_and_filter_in_cache */, nullptr /* status */, persist_udt_); ASSERT_OK( table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum)); // Ensure that keys are not in cache before MultiGet. for (auto& key : keys) { ASSERT_FALSE(table->TEST_KeyInCache(read_opts, key.ToString())); } // Prepare MultiGetContext. autovector get_context; autovector key_context; autovector sorted_keys; for (size_t i = 0; i < keys.size(); ++i) { get_context.emplace_back(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, ExtractUserKey(keys[i]), &values[i], nullptr, nullptr, nullptr, nullptr, true /* do_merge */, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); key_context.emplace_back(nullptr, keys_without_timestamps[i], &values[i], nullptr, nullptr, &statuses.back()); key_context.back().get_context = &get_context.back(); } for (auto& key_ctx : key_context) { sorted_keys.emplace_back(&key_ctx); } MultiGetContext ctx(&sorted_keys, 0, sorted_keys.size(), 0, read_opts, fs_.get(), nullptr); // Execute MultiGet. MultiGetContext::Range range = ctx.GetMultiGetRange(); PerfContext* perf_ctx = get_perf_context(); perf_ctx->Reset(); table->MultiGet(read_opts, &range, nullptr); ASSERT_GE(perf_ctx->block_read_count - perf_ctx->index_block_read_count - perf_ctx->filter_block_read_count - perf_ctx->compression_dict_block_read_count, 1); ASSERT_GE(perf_ctx->block_read_byte, 1); for (const Status& status : statuses) { ASSERT_OK(status); } // Check that keys are in cache after MultiGet. for (size_t i = 0; i < keys.size(); i++) { ASSERT_TRUE(table->TEST_KeyInCache(read_opts, keys[i])); ASSERT_EQ(values[i].ToString(), *expected_values[i]); } } TEST_P(BlockBasedTableReaderTest, NewIterator) { Options options; ReadOptions read_opts; std::string dummy_ts(sizeof(uint64_t), '\0'); Slice read_timestamp = dummy_ts; if (udt_enabled_) { options.comparator = test::BytewiseComparatorWithU64TsWrapper(); read_opts.timestamp = &read_timestamp; } options.persist_user_defined_timestamps = persist_udt_; size_t ts_sz = options.comparator->timestamp_size(); std::vector> kv = BlockBasedTableReaderBaseTest::GenerateKVMap( 100 /* num_block */, true /* mixed_with_human_readable_string_value */, ts_sz); std::string table_name = "BlockBasedTableReaderTest_NewIterator" + CompressionTypeToString(compression_type_); ImmutableOptions ioptions(options); CreateTable(table_name, ioptions, compression_type_, kv, compression_parallel_threads_, compression_dict_bytes_); std::unique_ptr table; FileOptions foptions; foptions.use_direct_reads = use_direct_reads_; InternalKeyComparator comparator(options.comparator); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table, true /* bool prefetch_index_and_filter_in_cache */, nullptr /* status */, persist_udt_); ASSERT_OK( table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum)); std::unique_ptr iter; iter.reset(table->NewIterator( read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); // Test forward scan. ASSERT_TRUE(!iter->Valid()); iter->SeekToFirst(); ASSERT_OK(iter->status()); for (auto kv_iter = kv.begin(); kv_iter != kv.end(); kv_iter++) { ASSERT_EQ(iter->key().ToString(), kv_iter->first); ASSERT_EQ(iter->value().ToString(), kv_iter->second); iter->Next(); ASSERT_OK(iter->status()); } ASSERT_TRUE(!iter->Valid()); ASSERT_OK(iter->status()); // Test backward scan. iter->SeekToLast(); ASSERT_OK(iter->status()); for (auto kv_iter = kv.rbegin(); kv_iter != kv.rend(); kv_iter++) { ASSERT_EQ(iter->key().ToString(), kv_iter->first); ASSERT_EQ(iter->value().ToString(), kv_iter->second); iter->Prev(); ASSERT_OK(iter->status()); } ASSERT_TRUE(!iter->Valid()); ASSERT_OK(iter->status()); } class ChargeTableReaderTest : public BlockBasedTableReaderBaseTest, public testing::WithParamInterface< CacheEntryRoleOptions::Decision /* charge_table_reader_mem */> { protected: static std::size_t CalculateMaxTableReaderNumBeforeCacheFull( std::size_t cache_capacity, std::size_t approx_table_reader_mem) { // To make calculation easier for testing assert(cache_capacity % CacheReservationManagerImpl< CacheEntryRole::kBlockBasedTableReader>:: GetDummyEntrySize() == 0 && cache_capacity >= 2 * CacheReservationManagerImpl< CacheEntryRole::kBlockBasedTableReader>:: GetDummyEntrySize()); // We need to subtract 1 for max_num_dummy_entry to account for dummy // entries' overhead, assumed the overhead is no greater than 1 dummy entry // size std::size_t max_num_dummy_entry = (size_t)std::floor(( 1.0 * cache_capacity / CacheReservationManagerImpl< CacheEntryRole::kBlockBasedTableReader>::GetDummyEntrySize())) - 1; std::size_t cache_capacity_rounded_to_dummy_entry_multiples = max_num_dummy_entry * CacheReservationManagerImpl< CacheEntryRole::kBlockBasedTableReader>::GetDummyEntrySize(); std::size_t max_table_reader_num_capped = static_cast( std::floor(1.0 * cache_capacity_rounded_to_dummy_entry_multiples / approx_table_reader_mem)); return max_table_reader_num_capped; } void SetUp() override { // To cache and re-use the same kv map and compression type in the test // suite for elimiating variance caused by these two factors kv_ = BlockBasedTableReaderBaseTest::GenerateKVMap(); compression_type_ = CompressionType::kNoCompression; table_reader_charge_tracking_cache_ = std::make_shared< TargetCacheChargeTrackingCache< CacheEntryRole::kBlockBasedTableReader>>((NewLRUCache( 4 * CacheReservationManagerImpl< CacheEntryRole::kBlockBasedTableReader>::GetDummyEntrySize(), 0 /* num_shard_bits */, true /* strict_capacity_limit */))); // To ApproximateTableReaderMem() without being affected by // the feature of charging its memory, we turn off the feature charge_table_reader_ = CacheEntryRoleOptions::Decision::kDisabled; BlockBasedTableReaderBaseTest::SetUp(); approx_table_reader_mem_ = ApproximateTableReaderMem(); // Now we condtionally turn on the feature to test charge_table_reader_ = GetParam(); ConfigureTableFactory(); } void ConfigureTableFactory() override { BlockBasedTableOptions table_options; table_options.cache_usage_options.options_overrides.insert( {CacheEntryRole::kBlockBasedTableReader, {/*.charged = */ charge_table_reader_}}); table_options.block_cache = table_reader_charge_tracking_cache_; table_options.cache_index_and_filter_blocks = false; table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); table_options.partition_filters = true; table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); } CacheEntryRoleOptions::Decision charge_table_reader_; std::shared_ptr< TargetCacheChargeTrackingCache> table_reader_charge_tracking_cache_; std::size_t approx_table_reader_mem_; std::vector> kv_; CompressionType compression_type_; private: std::size_t ApproximateTableReaderMem() { std::size_t approx_table_reader_mem = 0; std::string table_name = "table_for_approx_table_reader_mem"; ImmutableOptions ioptions(options_); CreateTable(table_name, ioptions, compression_type_, kv_); std::unique_ptr table; Status s; NewBlockBasedTableReader( FileOptions(), ImmutableOptions(options_), InternalKeyComparator(options_.comparator), table_name, &table, false /* prefetch_index_and_filter_in_cache */, &s); assert(s.ok()); approx_table_reader_mem = table->ApproximateMemoryUsage(); assert(approx_table_reader_mem > 0); return approx_table_reader_mem; } }; INSTANTIATE_TEST_CASE_P( ChargeTableReaderTest, ChargeTableReaderTest, ::testing::Values(CacheEntryRoleOptions::Decision::kEnabled, CacheEntryRoleOptions::Decision::kDisabled)); TEST_P(ChargeTableReaderTest, Basic) { const std::size_t max_table_reader_num_capped = ChargeTableReaderTest::CalculateMaxTableReaderNumBeforeCacheFull( table_reader_charge_tracking_cache_->GetCapacity(), approx_table_reader_mem_); // Acceptable estimtation errors coming from // 1. overstimate max_table_reader_num_capped due to # dummy entries is high // and results in metadata charge overhead greater than 1 dummy entry size // (violating our assumption in calculating max_table_reader_num_capped) // 2. overestimate/underestimate max_table_reader_num_capped due to the gap // between ApproximateTableReaderMem() and actual table reader mem std::size_t max_table_reader_num_capped_upper_bound = (std::size_t)(max_table_reader_num_capped * 1.05); std::size_t max_table_reader_num_capped_lower_bound = (std::size_t)(max_table_reader_num_capped * 0.95); std::size_t max_table_reader_num_uncapped = (std::size_t)(max_table_reader_num_capped * 1.1); ASSERT_GT(max_table_reader_num_uncapped, max_table_reader_num_capped_upper_bound) << "We need `max_table_reader_num_uncapped` > " "`max_table_reader_num_capped_upper_bound` to differentiate cases " "between " "charge_table_reader_ == kDisabled and == kEnabled)"; Status s = Status::OK(); std::size_t opened_table_reader_num = 0; std::string table_name; std::vector> tables; ImmutableOptions ioptions(options_); // Keep creating BlockBasedTableReader till hiting the memory limit based on // cache capacity and creation fails (when charge_table_reader_ == // kEnabled) or reaching a specfied big number of table readers (when // charge_table_reader_ == kDisabled) while (s.ok() && opened_table_reader_num < max_table_reader_num_uncapped) { table_name = "table_" + std::to_string(opened_table_reader_num); CreateTable(table_name, ioptions, compression_type_, kv_); tables.push_back(std::unique_ptr()); NewBlockBasedTableReader( FileOptions(), ImmutableOptions(options_), InternalKeyComparator(options_.comparator), table_name, &tables.back(), false /* prefetch_index_and_filter_in_cache */, &s); if (s.ok()) { ++opened_table_reader_num; } } if (charge_table_reader_ == CacheEntryRoleOptions::Decision::kEnabled) { EXPECT_TRUE(s.IsMemoryLimit()) << "s: " << s.ToString(); EXPECT_TRUE(s.ToString().find( kCacheEntryRoleToCamelString[static_cast( CacheEntryRole::kBlockBasedTableReader)]) != std::string::npos); EXPECT_TRUE(s.ToString().find("memory limit based on cache capacity") != std::string::npos); EXPECT_GE(opened_table_reader_num, max_table_reader_num_capped_lower_bound); EXPECT_LE(opened_table_reader_num, max_table_reader_num_capped_upper_bound); std::size_t updated_max_table_reader_num_capped = ChargeTableReaderTest::CalculateMaxTableReaderNumBeforeCacheFull( table_reader_charge_tracking_cache_->GetCapacity() / 2, approx_table_reader_mem_); // Keep deleting BlockBasedTableReader to lower down memory usage from the // memory limit to make the next creation succeeds while (opened_table_reader_num >= updated_max_table_reader_num_capped) { tables.pop_back(); --opened_table_reader_num; } table_name = "table_for_successful_table_reader_open"; CreateTable(table_name, ioptions, compression_type_, kv_); tables.push_back(std::unique_ptr()); NewBlockBasedTableReader( FileOptions(), ImmutableOptions(options_), InternalKeyComparator(options_.comparator), table_name, &tables.back(), false /* prefetch_index_and_filter_in_cache */, &s); EXPECT_TRUE(s.ok()) << s.ToString(); tables.clear(); EXPECT_EQ(table_reader_charge_tracking_cache_->GetCacheCharge(), 0); } else { EXPECT_TRUE(s.ok() && opened_table_reader_num == max_table_reader_num_uncapped) << "s: " << s.ToString() << " opened_table_reader_num: " << std::to_string(opened_table_reader_num); EXPECT_EQ(table_reader_charge_tracking_cache_->GetCacheCharge(), 0); } } class BlockBasedTableReaderTestVerifyChecksum : public BlockBasedTableReaderTest { public: BlockBasedTableReaderTestVerifyChecksum() : BlockBasedTableReaderTest() {} }; TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) { Options options; ReadOptions read_opts; std::string dummy_ts(sizeof(uint64_t), '\0'); Slice read_timestamp = dummy_ts; if (udt_enabled_) { options.comparator = test::BytewiseComparatorWithU64TsWrapper(); read_opts.timestamp = &read_timestamp; } options.persist_user_defined_timestamps = persist_udt_; size_t ts_sz = options.comparator->timestamp_size(); std::vector> kv = BlockBasedTableReaderBaseTest::GenerateKVMap( 800 /* num_block */, false /* mixed_with_human_readable_string_value=*/, ts_sz); options.statistics = CreateDBStatistics(); ImmutableOptions ioptions(options); std::string table_name = "BlockBasedTableReaderTest" + CompressionTypeToString(compression_type_); CreateTable(table_name, ioptions, compression_type_, kv, compression_parallel_threads_, compression_dict_bytes_); std::unique_ptr table; FileOptions foptions; foptions.use_direct_reads = use_direct_reads_; InternalKeyComparator comparator(options.comparator); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table, true /* bool prefetch_index_and_filter_in_cache */, nullptr /* status */, persist_udt_); // Use the top level iterator to find the offset/size of the first // 2nd level index block and corrupt the block IndexBlockIter iiter_on_stack; BlockCacheLookupContext context{TableReaderCaller::kUserVerifyChecksum}; InternalIteratorBase* iiter = table->NewIndexIterator( read_opts, /*need_upper_bound_check=*/false, &iiter_on_stack, /*get_context=*/nullptr, &context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr = std::unique_ptr>(iiter); } ASSERT_OK(iiter->status()); iiter->SeekToFirst(); BlockHandle handle = static_cast(iiter) ->index_iter_->value() .handle; table.reset(); // Corrupt the block pointed to by handle ASSERT_OK(test::CorruptFile(options.env, Path(table_name), static_cast(handle.offset()), 128)); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table, true /* bool prefetch_index_and_filter_in_cache */, nullptr /* status */, persist_udt_); ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT)); Status s = table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum); ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT)); ASSERT_EQ(s.code(), Status::kCorruption); } // Param 1: compression type // Param 2: whether to use direct reads // Param 3: Block Based Table Index type, partitioned filters are also enabled // when index type is kTwoLevelIndexSearch // Param 4: BBTO no_block_cache option // Param 5: test mode for the user-defined timestamp feature // Param 6: number of parallel compression threads // Param 7: CompressionOptions.max_dict_bytes and // CompressionOptions.max_dict_buffer_bytes. This enable/disables // compression dictionary. // Param 8: test mode to specify the pattern for generating key / value pairs. INSTANTIATE_TEST_CASE_P( BlockBasedTableReaderTest, BlockBasedTableReaderTest, ::testing::Combine( ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Bool(), ::testing::Values( BlockBasedTableOptions::IndexType::kBinarySearch, BlockBasedTableOptions::IndexType::kHashSearch, BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch, BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey), ::testing::Values(false), ::testing::ValuesIn(test::GetUDTTestModes()), ::testing::Values(1, 2), ::testing::Values(0, 4096), ::testing::Values(false))); INSTANTIATE_TEST_CASE_P( BlockBasedTableReaderGetTest, BlockBasedTableReaderGetTest, ::testing::Combine( ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Bool(), ::testing::Values( BlockBasedTableOptions::IndexType::kBinarySearch, BlockBasedTableOptions::IndexType::kHashSearch, BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch, BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey), ::testing::Values(false), ::testing::ValuesIn(test::GetUDTTestModes()), ::testing::Values(1, 2), ::testing::Values(0, 4096), ::testing::Values(false, true))); INSTANTIATE_TEST_CASE_P( VerifyChecksum, BlockBasedTableReaderTestVerifyChecksum, ::testing::Combine( ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Values(false), ::testing::Values( BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch), ::testing::Values(true), ::testing::ValuesIn(test::GetUDTTestModes()), ::testing::Values(1, 2), ::testing::Values(0), ::testing::Values(false))); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); }