/** * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. * * See file LICENSE for terms. */ #include extern "C" { #include #include } #include #include #include #include class test_pgtable : public ucs::test { protected: typedef std::vector search_result_t; virtual void init() { ucs::test::init(); ucs_status_t status = ucs_pgtable_init(&m_pgtable, pgd_alloc, pgd_free); ASSERT_UCS_OK(status); } virtual void cleanup() { ucs_pgtable_cleanup(&m_pgtable); ucs::test::cleanup(); } void insert(ucs_pgt_region_t *region, ucs_status_t exp_status = UCS_OK, const std::string& message = "") { ucs_status_t status = ucs_pgtable_insert(&m_pgtable, region); if (exp_status == UCS_OK) { ASSERT_UCS_OK(status, << " inserting 0x" << std::hex << region->start << "..0x" <end); } else { EXPECT_EQ(exp_status, status) << message; } } void remove(ucs_pgt_region_t *region, ucs_status_t exp_status = UCS_OK, const std::string& message = "") { ucs_status_t status = ucs_pgtable_remove(&m_pgtable, region); if (exp_status == UCS_OK) { ASSERT_UCS_OK(status); } else { EXPECT_EQ(exp_status, status) << message; } } ucs_pgt_region_t *lookup(ucs_pgt_addr_t address) { return ucs_pgtable_lookup(&m_pgtable, address); } unsigned num_regions() { return ucs_pgtable_num_regions(&m_pgtable); } void dump() { ucs_pgtable_dump(&m_pgtable, UCS_LOG_LEVEL_DEBUG); } void purge() { ucs_pgtable_purge(&m_pgtable, pgd_purge_cb, reinterpret_cast(this)); } search_result_t search(ucs_pgt_addr_t from, ucs_pgt_addr_t to) { search_result_t result; ucs_pgtable_search_range(&m_pgtable, from, to, pgd_search_cb, reinterpret_cast(&result)); return result; } static ucs_pgt_region_t* make_region(ucs_pgt_addr_t start, ucs_pgt_addr_t end) { ucs_pgt_region_t r = {start, end}; return new ucs_pgt_region_t(r); } static bool is_overlap(const ucs_pgt_region_t *region, ucs_pgt_addr_t from, ucs_pgt_addr_t to) { return ucs_max(from, region->start) <= ucs_min(to, region->end); } static unsigned count_overlap(const ucs::ptr_vector& regions, ucs_pgt_addr_t from, ucs_pgt_addr_t to) { unsigned count = 0; for (ucs::ptr_vector::const_iterator iter = regions.begin(); iter != regions.end(); ++iter) { if (is_overlap(*iter, from, to)) { ++count; } } return count; } void test_search_region(const ucs_pgt_region_t ®ion) { search_result_t result; result = search(region.start, region.end - 1); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion, result.front()); result = search(region.start, region.end); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion, result.front()); result = search(region.start, region.end + 1); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion, result.front()); } private: static ucs_pgt_dir_t *pgd_alloc(const ucs_pgtable_t *pgtable) { return new ucs_pgt_dir_t; } static void pgd_free(const ucs_pgtable_t *pgtable, ucs_pgt_dir_t *pgdir) { delete pgdir; } static void pgd_purge_cb(const ucs_pgtable_t *pgtable, ucs_pgt_region_t *region, void *arg) { } static void pgd_search_cb(const ucs_pgtable_t *pgtable, ucs_pgt_region_t *region, void *arg) { search_result_t *result = reinterpret_cast(arg); result->push_back(region); } protected: ucs_pgtable_t m_pgtable; }; UCS_TEST_F(test_pgtable, basic) { ucs_pgt_region_t region; region.start = 0x400800; region.end = 0x403400; insert(®ion); dump(); EXPECT_EQ(®ion, lookup(0x400800)); EXPECT_EQ(®ion, lookup(0x402020)); EXPECT_EQ(®ion, lookup(0x4033ff)); EXPECT_TRUE(NULL == lookup(0x403400)); EXPECT_TRUE(NULL == lookup(0x0)); EXPECT_TRUE(NULL == lookup(std::numeric_limits::max())); EXPECT_EQ(1u, num_regions()); remove(®ion); insert(®ion); dump(); purge(); /* region goes out of scope so we must remove it */ } UCS_TEST_F(test_pgtable, lookup_adjacent) { ucs_pgt_region_t region1 = {0xc500000, 0xc500400}; ucs_pgt_region_t region2 = {0xc500400, 0xc500800}; insert(®ion1); insert(®ion2); dump(); EXPECT_EQ(®ion2, lookup(0xc500400)); EXPECT_EQ(®ion1, lookup(0xc500000)); purge(); } UCS_TEST_F(test_pgtable, multi_search) { for (int count = 0; count < 10; ++count) { ucs::ptr_vector regions; ucs_pgt_addr_t min = std::numeric_limits::max(); ucs_pgt_addr_t max = 0; /* generate random regions */ unsigned num_regions = 0; for (int i = 0; i < 200 / ucs::test_time_multiplier(); ++i) { ucs_pgt_addr_t start = (ucs::rand() & 0x7fffffff) << 24; size_t size = ucs_min((size_t)ucs::rand(), std::numeric_limits::max() - start); ucs_pgt_addr_t end = start + ucs_align_down(size, UCS_PGT_ADDR_ALIGN); if (count_overlap(regions, start, end)) { /* Make sure regions do not overlap */ continue; } min = ucs_min(start, min); max = ucs_max(start, max); regions.push_back(make_region(start, end)); ++num_regions; } /* Insert regions */ for (ucs::ptr_vector::const_iterator iter = regions.begin(); iter != regions.end(); ++iter) { insert(*iter); } /* Count how many fall in the [1/4, 3/4] range */ ucs_pgt_addr_t from = ((min * 90) + (max * 10)) / 100; ucs_pgt_addr_t to = ((min * 10) + (max * 90)) / 100; unsigned num_in_range = count_overlap(regions, from, to); /* Search in page table */ search_result_t result = search(from, to); UCS_TEST_MESSAGE << "found " << result.size() << "/" << num_in_range << " regions in the range 0x" << std::hex << from << "..0x" << to << std::dec; EXPECT_EQ(num_in_range, result.size()); purge(); } } UCS_TEST_SKIP_COND_F(test_pgtable, invalid_param, (UCS_PGT_ADDR_ALIGN == 1)) { ucs_pgt_region_t region1 = {0x4000, 0x4001}; insert(®ion1, UCS_ERR_INVALID_PARAM); ucs_pgt_region_t region2 = {0x4001, 0x5000}; insert(®ion2, UCS_ERR_INVALID_PARAM); ucs_pgt_region_t region3 = {0x5000, 0x4000}; insert(®ion3, UCS_ERR_INVALID_PARAM); } UCS_TEST_F(test_pgtable, overlap_insert) { ucs_pgt_region_t region1 = {0x4000, 0x6000}; insert(®ion1); ucs_pgt_region_t region2 = {0x5000, 0x7000}; insert(®ion2, UCS_ERR_ALREADY_EXISTS, "overlap"); ucs_pgt_region_t region3 = {0x3000, 0x5000}; insert(®ion3, UCS_ERR_ALREADY_EXISTS, "overlap"); remove(®ion1); } UCS_TEST_F(test_pgtable, nonexist_remove) { ucs_pgt_region_t region1 = {0x4000, 0x6000}; remove(®ion1, UCS_ERR_NO_ELEM); ucs_pgt_region_t region2 = {0x5000, 0x7000}; insert(®ion2); remove(®ion1, UCS_ERR_NO_ELEM); region1.start = 0x5000; region1.end = 0x5000; remove(®ion1, UCS_ERR_NO_ELEM); region1 = region2; remove(®ion1, UCS_ERR_NO_ELEM); /* Fail - should be pointer-equal */ remove(®ion2); } UCS_TEST_F(test_pgtable, search_large_region) { ucs_pgt_region_t region = {0x3c03cb00, 0x3c03f600}; insert(®ion, UCS_OK); search_result_t result; result = search(0x36990000, 0x3c810000); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion, result.front()); result = search(region.start - 1, region.start); EXPECT_EQ(1u, result.size()); result = search(region.start, region.start + 1); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion, result.front()); result = search(region.end - 1, region.end); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion, result.front()); result = search(region.end, region.end + 1); EXPECT_EQ(0u, result.size()); remove(®ion); } UCS_TEST_F(test_pgtable, search_non_contig_regions) { const size_t region_size = UCS_BIT(28); size_t start, end; // insert [0x7f6ef0000000 .. 0x7f6f00000000] start = 0x7f6ef0000000; end = start + region_size; ucs_pgt_region_t region1 = {start, end}; insert(®ion1, UCS_OK); // insert [0x7f6f2c021000 .. 0x7f6f3c021000] start = 0x7f6f2c021000; end = start + region_size; ucs_pgt_region_t region2 = {start, end}; insert(®ion2, UCS_OK); // insert [0x7f6f42000000 .. 0x7f6f52000000] start = 0x7f6f42000000; end = start + region_size; ucs_pgt_region_t region3 = {start, end}; insert(®ion3, UCS_OK); search_result_t result; // search the 1st region test_search_region(region1); // search the 2nd region test_search_region(region2); // search the 3rd region test_search_region(region3); remove(®ion1); remove(®ion2); remove(®ion3); } UCS_TEST_F(test_pgtable, search_adjacent_regions) { const size_t region_size = UCS_BIT(28); size_t start, end; // insert [0x7f6ef0000000 .. 0x7f6f00000000] start = 0x7f6ef0000000; end = start + region_size; ucs_pgt_region_t region1 = {start, end}; insert(®ion1, UCS_OK); // insert [0x7f6f00000000 .. 0x7f6f10000000] start = end; end = start + region_size; ucs_pgt_region_t region2 = {region1.end, 0x7f6f40000000}; insert(®ion2, UCS_OK); // insert [0x7f6f10000000 .. 0x7f6f20000000] start = end; end = start + region_size; ucs_pgt_region_t region3 = {region2.end, 0x7f6f48000000}; insert(®ion3, UCS_OK); search_result_t result; // search the 1st region result = search(region1.start, region1.end - 1); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion1, result.front()); result = search(region1.start, region1.end); EXPECT_EQ(2u, result.size()); EXPECT_EQ(®ion1, result.front()); result = search(region1.start, region1.end + 1); EXPECT_EQ(2u, result.size()); EXPECT_EQ(®ion1, result.front()); // search the 2nd region result = search(region2.start, region2.end - 1); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion2, result.front()); result = search(region2.start, region2.end); EXPECT_EQ(2u, result.size()); EXPECT_EQ(®ion2, result.front()); result = search(region2.start, region2.end + 1); EXPECT_EQ(2u, result.size()); EXPECT_EQ(®ion2, result.front()); // search the 3rd region result = search(region3.start, region3.end - 1); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion3, result.front()); result = search(region3.start, region3.end); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion3, result.front()); result = search(region3.start, region3.end + 1); EXPECT_EQ(1u, result.size()); EXPECT_EQ(®ion3, result.front()); remove(®ion1); remove(®ion2); remove(®ion3); } class test_pgtable_perf : public test_pgtable { protected: void insert(ucs_pgt_region_t *region) { /* Insert to both */ test_pgtable::insert(region); m_stl_pgt.insert(region); } void purge() { test_pgtable::purge(); m_stl_pgt.clear(); } ucs_pgt_region_t* lookup_in_stl(ucs_pgt_addr_t address) { ucs_pgt_region_t search = {address, address + 1}; stl_pgtable_t::iterator iter = m_stl_pgt.lower_bound(&search); if (iter == m_stl_pgt.end()) { return NULL; } else { ucs_pgt_region_t *region = *iter; EXPECT_LT(address, region->end) << std::hex << "address=" << address << " region " << region->start << ".." << region->end << std::dec; return (address >= region->start) ? region : NULL; } } ucs_pgt_region_t* lookup_in_pgt(ucs_pgt_addr_t address) { return test_pgtable::lookup(address); } void measure_workload(ucs_pgt_addr_t max_addr, size_t block_size, /* Basic block size */ unsigned blocks_per_superblock, /* Number of consecutive basic blocks per big block */ unsigned num_superblocks, /* Number of big blocks */ unsigned num_lookups, /* How many lookups to generate */ bool random_access, /* Whether access pattern is random or ordered */ double hit_ratio) /* Probability of lookup hit */ { block_size = ucs_align_up_pow2(block_size, UCS_PGT_ADDR_ALIGN); const size_t superblock_size = block_size * blocks_per_superblock; const size_t max_start = max_addr - superblock_size; ucs::ptr_vector regions; std::vector lookups; lookups.clear(); /* Generate random superblocks */ ucs_pgt_addr_t start = 0; std::vector superblocks; for (unsigned i = 0; i < num_superblocks; ++i) { ucs_pgt_addr_t addr = random_address(start, max_start); superblocks.push_back(addr); start = addr + superblock_size * 2; /* minimal gap */ if (start >= max_start) { break; } } num_superblocks = superblocks.size(); /* Insert them */ for (unsigned i = 0; i < num_superblocks; ++i) { for (unsigned j = 0; j < blocks_per_superblock; ++j) { ucs_pgt_region_t *region = new ucs_pgt_region_t; region->start = superblocks[i] + (j * block_size); region->end = region->start + block_size; regions.push_back(region); insert(region); } } EXPECT_EQ(num_superblocks * blocks_per_superblock, num_regions()); /* Create workload */ unsigned sb_idx = 0; unsigned block_idx = 0; for (unsigned n = 0; n < num_lookups; ++n) { ucs_pgt_addr_t addr = superblocks[sb_idx] + block_idx * block_size; if (ucs::rand() > (RAND_MAX * hit_ratio)) { addr += superblock_size; /* make it miss by falling to inter-block gap */ } lookups.push_back(addr); if (random_access) { sb_idx = ucs::rand() % num_superblocks; block_idx = ucs::rand() % blocks_per_superblock; } else { block_idx = (block_idx + 1) % blocks_per_superblock; if (block_idx == 0) sb_idx = (sb_idx + 1) % num_superblocks; } } invalidate_cache(); std::pair result_stl = measure(lookups, true); invalidate_cache(); std::pair result_pgt = measure(lookups, false); EXPECT_EQ(result_stl.second, result_pgt.second); UCS_TEST_MESSAGE << std::dec << num_superblocks << " areas of " << blocks_per_superblock << "x" << block_size << " bytes, " << (random_access ? "random" : "ordered") << ": " << "stl: " << (ucs_time_to_nsec(result_stl.first) / num_lookups) << " ns, " "ucs: " << (ucs_time_to_nsec(result_pgt.first) / num_lookups) << " ns " << (result_pgt.second * 100) / lookups.size() << "% hit" ; purge(); } private: struct region_comparator { bool operator()(ucs_pgt_region_t *region1, ucs_pgt_region_t *region2) const { return region1->end <= region2->start; } }; typedef std::set stl_pgtable_t; std::pair inline measure(const std::vector& lookups, bool use_stl) { unsigned hit_count = 0; ucs_time_t start_time = ucs_get_time(); ucs_compiler_fence(); for (std::vector::const_iterator iter = lookups.begin(); iter != lookups.end(); ++iter) { ucs_pgt_region_t *region = use_stl ? lookup_in_stl(*iter) : lookup_in_pgt(*iter); if (region != NULL) { ++hit_count; } } ucs_compiler_fence(); return std::make_pair(ucs_get_time() - start_time, hit_count); } ucs_pgt_addr_t random_address(ucs_pgt_addr_t start, ucs_pgt_addr_t max) { ucs_pgt_addr_t r = (ucs_pgt_addr_t)ucs::rand() * (max / 1000) / RAND_MAX; return ucs_align_up_pow2((r % (max - start)) + start, UCS_PGT_ADDR_ALIGN); } void invalidate_cache() { size_t size = 30 * 1024 * 1024; void *ptr = malloc(size); memset(ptr, 0xbb, size); free(ptr); } stl_pgtable_t m_stl_pgt; }; /* * Compare out lookup performance to STL's */ UCS_TEST_F(test_pgtable_perf, basic) { ucs_pgt_region_t region = {0x4000, 0x5000}; insert(®ion); EXPECT_EQ(®ion, lookup_in_stl(0x4500)); EXPECT_EQ(®ion, lookup_in_stl(0x4000)); EXPECT_EQ(®ion, lookup_in_pgt(0x4500)); EXPECT_TRUE(NULL == lookup_in_stl(0x5000)); purge(); } UCS_TEST_SKIP_COND_F(test_pgtable_perf, workloads, (ucs::test_time_multiplier() != 1)) { measure_workload(UCS_MASK(28), 1024, 10000, 20, 5000000, false, 0.8); measure_workload(UCS_MASK(28), 1024, 10000, 20, 500000, true, 0.8); measure_workload(UCS_MASK(28), 1024, 10000, 2, 10000000, false, 0.8); measure_workload(UCS_MASK(28), 1024 * 256, 1, 4, 10000000, false, 0.8); }