From cd6231c89643d450f538c6063fb759b3bfcea924 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 5 Jun 2023 11:15:48 -0400 Subject: MemISAM tests + bugfixes --- tests/memisam_tests.cpp | 420 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 290 insertions(+), 130 deletions(-) (limited to 'tests') diff --git a/tests/memisam_tests.cpp b/tests/memisam_tests.cpp index 260b47c..dd4ce72 100644 --- a/tests/memisam_tests.cpp +++ b/tests/memisam_tests.cpp @@ -1,220 +1,380 @@ +/* + * tests/irs_tests.cpp + * + * Unit tests for MemISAM (Augmented B+Tree) shard + * + * Copyright (C) 2023 Douglas Rumbaugh + * Dong Xie + * + * All rights reserved. Published under the Modified BSD License. + * + */ #include "shard/MemISAM.h" -#include "framework/InternalLevel.h" -#include "util/bf_config.h" #include "testing.h" #include using namespace de; -typedef MemISAM M_ISAM; +typedef MemISAM Shard; -START_TEST(t_memtable_init) +START_TEST(t_mbuffer_init) { - auto buffer = new MutableBuffer(1024, true, 512, g_rng); + auto buffer = new MutableBuffer(1024, true, 1024); for (uint64_t i = 512; i > 0; i--) { - buffer->append(Rec {i, (uint32_t)i}); + uint32_t v = i; + buffer->append({i,v, 1}); } - Rec r; - r.set_tombstone(); for (uint64_t i = 1; i <= 256; ++i) { - r.key = i; - r.value = i; - buffer->append(r); + uint32_t v = i; + buffer->append({i, v, 1}, true); } for (uint64_t i = 257; i <= 512; ++i) { - buffer->append({i, (uint32_t) i+1}); + uint32_t v = i + 1; + buffer->append({i, v, 1}); } - BloomFilter* bf = new BloomFilter(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS, g_rng); - M_ISAM* run = new M_ISAM(buffer, bf); - ck_assert_uint_eq(run->get_record_count(), 512); + Shard* shard = new Shard(buffer); + ck_assert_uint_eq(shard->get_record_count(), 512); - delete bf; delete buffer; - delete run; + delete shard; } -START_TEST(t_inmemrun_init) + +START_TEST(t_irs_init) { size_t n = 512; - auto memtable1 = create_test_mbuffer(n); - auto memtable2 = create_test_mbuffer(n); - auto memtable3 = create_test_mbuffer(n); + auto mbuffer1 = create_test_mbuffer(n); + auto mbuffer2 = create_test_mbuffer(n); + auto mbuffer3 = create_test_mbuffer(n); - BloomFilter* bf1 = new BloomFilter(100, BF_HASH_FUNCS, g_rng); - BloomFilter* bf2 = new BloomFilter(100, BF_HASH_FUNCS, g_rng); - BloomFilter* bf3 = new BloomFilter(100, BF_HASH_FUNCS, g_rng); - auto run1 = new M_ISAM(memtable1, bf1); - auto run2 = new M_ISAM(memtable2, bf2); - auto run3 = new M_ISAM(memtable3, bf3); + auto shard1 = new Shard(mbuffer1); + auto shard2 = new Shard(mbuffer2); + auto shard3 = new Shard(mbuffer3); - BloomFilter* bf4 = new BloomFilter(100, BF_HASH_FUNCS, g_rng); - M_ISAM* runs[3] = {run1, run2, run3}; - auto run4 = new M_ISAM(runs, 3, bf4); + Shard* shards[3] = {shard1, shard2, shard3}; + auto shard4 = new Shard(shards, 3); - ck_assert_int_eq(run4->get_record_count(), n * 3); - ck_assert_int_eq(run4->get_tombstone_count(), 0); + ck_assert_int_eq(shard4->get_record_count(), n * 3); + ck_assert_int_eq(shard4->get_tombstone_count(), 0); size_t total_cnt = 0; - size_t run1_idx = 0; - size_t run2_idx = 0; - size_t run3_idx = 0; - - for (size_t i = 0; i < run4->get_record_count(); ++i) { - auto rec1 = run1->get_record_at(run1_idx); - auto rec2 = run2->get_record_at(run2_idx); - auto rec3 = run3->get_record_at(run3_idx); - - auto cur_rec = run4->get_record_at(i); - - if (run1_idx < n && cur_rec == rec1) { - ++run1_idx; - } else if (run2_idx < n && cur_rec == rec2) { - ++run2_idx; - } else if (run3_idx < n && cur_rec == rec3) { - ++run3_idx; + size_t shard1_idx = 0; + size_t shard2_idx = 0; + size_t shard3_idx = 0; + + for (size_t i = 0; i < shard4->get_record_count(); ++i) { + auto rec1 = shard1->get_record_at(shard1_idx); + auto rec2 = shard2->get_record_at(shard2_idx); + auto rec3 = shard3->get_record_at(shard3_idx); + + auto cur_rec = shard4->get_record_at(i); + + if (shard1_idx < n && cur_rec->rec == rec1->rec) { + ++shard1_idx; + } else if (shard2_idx < n && cur_rec->rec == rec2->rec) { + ++shard2_idx; + } else if (shard3_idx < n && cur_rec->rec == rec3->rec) { + ++shard3_idx; } else { assert(false); } } - delete memtable1; - delete memtable2; - delete memtable3; - - delete bf1; - delete run1; - delete bf2; - delete run2; - delete bf3; - delete run3; - delete bf4; - delete run4; + delete mbuffer1; + delete mbuffer2; + delete mbuffer3; + + delete shard1; + delete shard2; + delete shard3; + delete shard4; } -START_TEST(t_get_lower_bound_index) +START_TEST(t_point_lookup) { size_t n = 10000; - auto memtable = create_double_seq_mbuffer(n); - - ck_assert_ptr_nonnull(memtable); - BloomFilter* bf = new BloomFilter(100, BF_HASH_FUNCS, g_rng); - M_ISAM* run = new M_ISAM(memtable, bf); - ck_assert_int_eq(run->get_record_count(), n); - ck_assert_int_eq(run->get_tombstone_count(), 0); + auto buffer = create_double_seq_mbuffer(n, false); + auto isam = Shard(buffer); - auto tbl_records = memtable->sorted_output(); for (size_t i=0; iget_record_at(i); - auto pos = run->get_lower_bound(tbl_rec->key); - ck_assert_int_eq(run->get_record_at(pos)->key, tbl_rec->key); - ck_assert_int_le(pos, i); + Rec r; + auto rec = (buffer->get_data() + i); + r.key = rec->rec.key; + r.value = rec->rec.value; + + auto result = isam.point_lookup(r); + ck_assert_ptr_nonnull(result); + ck_assert_int_eq(result->rec.key, r.key); + ck_assert_int_eq(result->rec.value, r.value); } - delete memtable; - delete bf; - delete run; + delete buffer; } +END_TEST + -START_TEST(t_get_upper_bound_index) +START_TEST(t_point_lookup_miss) { size_t n = 10000; - auto memtable = create_double_seq_mbuffer(n); - ck_assert_ptr_nonnull(memtable); - BloomFilter* bf = new BloomFilter(100, BF_HASH_FUNCS, g_rng); - M_ISAM* run = new M_ISAM(memtable, bf); + auto buffer = create_double_seq_mbuffer(n, false); + auto isam = Shard(buffer); - ck_assert_int_eq(run->get_record_count(), n); - ck_assert_int_eq(run->get_tombstone_count(), 0); + for (size_t i=n + 100; i<2*n; i++) { + Rec r; + r.key = i; + r.value = i; - auto tbl_records = memtable->sorted_output(); - for (size_t i=0; iget_record_at(i); - auto pos = run->get_upper_bound(tbl_rec->key); - ck_assert(pos == run->get_record_count() || - run->get_record_at(pos)->key > tbl_rec->key); - ck_assert_int_ge(pos, i); + auto result = isam.point_lookup(r); + ck_assert_ptr_null(result); } - delete memtable; - delete bf; - delete run; + delete buffer; } START_TEST(t_full_cancelation) { size_t n = 100; - auto mtable = create_double_seq_mbuffer(n, false); - auto mtable_ts = create_double_seq_mbuffer(n, true); - BloomFilter* bf1 = new BloomFilter(100, BF_HASH_FUNCS, g_rng); - BloomFilter* bf2 = new BloomFilter(100, BF_HASH_FUNCS, g_rng); - BloomFilter* bf3 = new BloomFilter(100, BF_HASH_FUNCS, g_rng); + auto buffer = create_double_seq_mbuffer(n, false); + auto buffer_ts = create_double_seq_mbuffer(n, true); - M_ISAM* run = new M_ISAM(mtable, bf1); - M_ISAM* run_ts = new M_ISAM(mtable_ts, bf2); + Shard* shard = new Shard(buffer); + Shard* shard_ts = new Shard(buffer_ts); - ck_assert_int_eq(run->get_record_count(), n); - ck_assert_int_eq(run->get_tombstone_count(), 0); - ck_assert_int_eq(run_ts->get_record_count(), n); - ck_assert_int_eq(run_ts->get_tombstone_count(), n); + ck_assert_int_eq(shard->get_record_count(), n); + ck_assert_int_eq(shard->get_tombstone_count(), 0); + ck_assert_int_eq(shard_ts->get_record_count(), n); + ck_assert_int_eq(shard_ts->get_tombstone_count(), n); - M_ISAM* runs[] = {run, run_ts}; + Shard* shards[] = {shard, shard_ts}; - M_ISAM* merged = new M_ISAM(runs, 2, bf3); + Shard* merged = new Shard(shards, 2); ck_assert_int_eq(merged->get_tombstone_count(), 0); ck_assert_int_eq(merged->get_record_count(), 0); - delete mtable; - delete mtable_ts; - delete bf1; - delete bf2; - delete bf3; - delete run; - delete run_ts; + delete buffer; + delete buffer_ts; + delete shard; + delete shard_ts; delete merged; } END_TEST + +START_TEST(t_irs_query) +{ + size_t n=1000; + auto buffer = create_double_seq_mbuffer(n); + auto isam = Shard(buffer); + + uint64_t lower_key = 100; + uint64_t upper_key = 250; + + size_t k = 100; + + size_t cnt[3] = {0}; + irs_query_parms parms = {lower_key, upper_key, k}; + parms.rng = gsl_rng_alloc(gsl_rng_mt19937); + + size_t total_samples = 0; + + for (size_t i=0; i<1000; i++) { + auto state = IRSQuery::get_query_state(&isam, &parms); + auto result = IRSQuery::query(&isam, state, &parms); + + ck_assert_int_eq(result.size(), k); + + for (auto &rec : result) { + ck_assert_int_le(rec.rec.key, upper_key); + ck_assert_int_ge(rec.rec.key, lower_key); + } + + IRSQuery::delete_query_state(state); + } + + gsl_rng_free(parms.rng); + delete buffer; +} +END_TEST + + +template +std::vector strip_wrapping(std::vector> vec) { + std::vector out(vec.size()); + for (size_t i=0; i(n); + + Shard shard = Shard(buffer); + + uint64_t lower_key = 100; + uint64_t upper_key = 250; + + size_t k = 1000; + + size_t cnt[3] = {0}; + irs_query_parms parms = {lower_key, upper_key, k}; + parms.rng = gsl_rng_alloc(gsl_rng_mt19937); + + std::vector> results(2); + + for (size_t i=0; i<1000; i++) { + auto state1 = IRSQuery::get_query_state(&shard, &parms); + results[0] = strip_wrapping(IRSQuery::query(&shard, state1, &parms)); + + auto state2 = IRSQuery::get_query_state(&shard, &parms); + results[1] = strip_wrapping(IRSQuery::query(&shard, state2, &parms)); + + IRSQuery::delete_query_state(state1); + IRSQuery::delete_query_state(state2); + } + + auto merged = IRSQuery::merge(results); + + ck_assert_int_eq(merged.size(), 2*k); + for (size_t i=0; i(n); + + uint64_t lower_key = 100; + uint64_t upper_key = 250; + + size_t k = 100; + + size_t cnt[3] = {0}; + irs_query_parms parms = {lower_key, upper_key, k}; + parms.rng = gsl_rng_alloc(gsl_rng_mt19937); + + size_t total_samples = 0; + + for (size_t i=0; i<1000; i++) { + auto state = IRSQuery::get_buffer_query_state(buffer, &parms); + auto result = IRSQuery::buffer_query(buffer, state, &parms); + + ck_assert_int_eq(result.size(), k); + + for (auto &rec : result) { + ck_assert_int_le(rec.rec.key, upper_key); + ck_assert_int_ge(rec.rec.key, lower_key); + } + + IRSQuery::delete_buffer_query_state(state); + } + + gsl_rng_free(parms.rng); + delete buffer; +} +END_TEST + + +START_TEST(t_irs_buffer_query_rejection) +{ + size_t n=1000; + auto buffer = create_double_seq_mbuffer(n); + + uint64_t lower_key = 100; + uint64_t upper_key = 250; + + size_t k = 10000; + + size_t cnt[3] = {0}; + irs_query_parms parms = {lower_key, upper_key, k}; + parms.rng = gsl_rng_alloc(gsl_rng_mt19937); + + size_t total_samples = 0; + + for (size_t i=0; i<1000; i++) { + auto state = IRSQuery::get_buffer_query_state(buffer, &parms); + auto result = IRSQuery::buffer_query(buffer, state, &parms); + + ck_assert_int_gt(result.size(), 0); + ck_assert_int_le(result.size(), k); + + for (auto &rec : result) { + ck_assert_int_le(rec.rec.key, upper_key); + ck_assert_int_ge(rec.rec.key, lower_key); + } + + IRSQuery::delete_buffer_query_state(state); + } + + gsl_rng_free(parms.rng); + delete buffer; +} +END_TEST + + Suite *unit_testing() { - Suite *unit = suite_create("M_ISAM Unit Testing"); + Suite *unit = suite_create("MemISAM Shard Unit Testing"); - TCase *create = tcase_create("lsm::M_ISAM constructor Testing"); - tcase_add_test(create, t_memtable_init); - tcase_add_test(create, t_inmemrun_init); + TCase *create = tcase_create("de::MemISAM constructor Testing"); + tcase_add_test(create, t_mbuffer_init); + tcase_add_test(create, t_irs_init); tcase_set_timeout(create, 100); suite_add_tcase(unit, create); - TCase *bounds = tcase_create("lsm::M_ISAM::get_{lower,upper}_bound Testing"); - tcase_add_test(bounds, t_get_lower_bound_index); - tcase_add_test(bounds, t_get_upper_bound_index); - tcase_set_timeout(bounds, 100); - suite_add_tcase(unit, bounds); - TCase *tombstone = tcase_create("lsm::M_ISAM::tombstone cancellation Testing"); + TCase *tombstone = tcase_create("de:MemISAM::tombstone cancellation Testing"); tcase_add_test(tombstone, t_full_cancelation); suite_add_tcase(unit, tombstone); + TCase *lookup = tcase_create("de:MemISAM:point_lookup Testing"); + tcase_add_test(lookup, t_point_lookup); + tcase_add_test(lookup, t_point_lookup_miss); + suite_add_tcase(unit, lookup); + + + TCase *sampling = tcase_create("de:MemISAM::MemISAMQuery Testing"); + tcase_add_test(sampling, t_irs_query); + tcase_add_test(sampling, t_irs_query_merge); + tcase_add_test(sampling, t_irs_buffer_query_rejection); + tcase_add_test(sampling, t_irs_buffer_query_scan); + tcase_set_timeout(sampling, 100); + suite_add_tcase(unit, sampling); + return unit; } -int run_unit_tests() + +int shard_unit_tests() { int failed = 0; Suite *unit = unit_testing(); - SRunner *unit_runner = srunner_create(unit); + SRunner *unit_shardner = srunner_create(unit); - srunner_run_all(unit_runner, CK_NORMAL); - failed = srunner_ntests_failed(unit_runner); - srunner_free(unit_runner); + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); return failed; } @@ -222,7 +382,7 @@ int run_unit_tests() int main() { - int unit_failed = run_unit_tests(); + int unit_failed = shard_unit_tests(); return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; } -- cgit v1.2.3