From 6bdcf74ad91e0efaa8c2e4339f5085fde8a7982b Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sat, 5 Apr 2025 19:08:12 -0400 Subject: working commit (temporary progress, doesn't build) --- CMakeLists.txt | 2 +- benchmarks/include/standard_benchmarks.h | 10 +- benchmarks/tail-latency/insert_query_threads.cpp | 125 ++++++++++----------- benchmarks/tail-latency/mixed_workload_average.cpp | 8 +- benchmarks/tail-latency/standard_latency_dist.cpp | 30 +++-- include/framework/DynamicExtension.h | 14 +-- include/framework/reconstruction/BSMPolicy.h | 10 +- .../reconstruction/BackgroundTieringPolicy.h | 13 ++- include/framework/reconstruction/LevelingPolicy.h | 30 +++-- include/framework/reconstruction/TieringPolicy.h | 13 ++- include/framework/structure/ExtensionStructure.h | 3 + include/framework/structure/InternalLevel.h | 8 +- 12 files changed, 148 insertions(+), 118 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0273b5a..10b564a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True) set(namespace "de") project("Practical Dynamic Extension" VERSION 0.1.0) -set(debug false) +set(debug true) set(tests True) set(bench false) set(vldb_bench false) diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 2cbe1a8..8388fd1 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -32,23 +32,23 @@ static size_t g_deleted_records = 0; static size_t total = 0; template Q> -std::unique_ptr> get_policy(size_t scale_factor, size_t buffer_size, int policy=0, size_t reccnt=0) { +std::unique_ptr> get_policy(size_t scale_factor, size_t buffer_size, int policy=0, size_t reccnt=0, size_t modifier=0) { de::ReconstructionPolicy *recon = nullptr; if (policy == 0) { - recon = new de::TieringPolicy(scale_factor, buffer_size); + recon = new de::TieringPolicy(scale_factor, buffer_size, modifier); } else if (policy == 1) { - recon = new de::LevelingPolicy(scale_factor, buffer_size); + recon = new de::LevelingPolicy(scale_factor, buffer_size, modifier); } else if (policy == 2) { - recon = new de::BSMPolicy(buffer_size); + recon = new de::BSMPolicy(buffer_size, modifier); } else if (policy == 3) { recon = new de::FloodL0Policy(buffer_size); } else if (policy == 4) { assert(reccnt > 0); recon = new de::FixedShardCountPolicy(buffer_size, scale_factor, reccnt); } else if (policy == 5) { - recon = new de::BackgroundTieringPolicy(scale_factor, buffer_size); + recon = new de::BackgroundTieringPolicy(scale_factor, buffer_size, modifier); } return std::unique_ptr>(recon); diff --git a/benchmarks/tail-latency/insert_query_threads.cpp b/benchmarks/tail-latency/insert_query_threads.cpp index 759de5d..1188ce0 100644 --- a/benchmarks/tail-latency/insert_query_threads.cpp +++ b/benchmarks/tail-latency/insert_query_threads.cpp @@ -39,7 +39,7 @@ typedef de::DEConfiguration idx; std::atomic inserts_done = false; -ssize_t query_ratio = 8; +ssize_t query_ratio = 0; std::atomic total_res = 0; size_t reccnt = 0; @@ -72,22 +72,10 @@ void insert_thread(Ext *extension, std::vector *records, size_t start_idx, TIMER_INIT(); TIMER_START(); - for (size_t i=start_idx; iprint_structure(); - while (!extension->insert((*records)[i])) { usleep(1); - //fprintf(stderr, "[D] Failed to insert\n"); } - - //fprintf(stderr, "[D] Record inserted\n"); - - if (extension->get_record_count() != i + 1) { - fprintf(stderr, "[E]: invalid record count %ld %ld\n", extension->get_record_count(), i+1); - extension->print_structure(); - exit(EXIT_FAILURE); - } } TIMER_STOP(); @@ -115,81 +103,88 @@ int main(int argc, char **argv) { std::vector sfs = {8}; //, 4, 8, 16, 32, 64, 128, 256, 512, 1024}; size_t buffer_size = 8000; std::vector policies = { - 5 + 0 }; - std::vector thread_counts = {8}; + std::vector thread_counts = {1}; + std::vector modifiers = {0, 1, 2, 3}; size_t insert_threads = 1; - size_t query_threads = 16; + size_t query_threads = 0; reccnt = n; for (auto pol : policies) { for (auto internal_thread_cnt : thread_counts) { - auto policy = get_policy(sfs[0], buffer_size, pol, n); - auto config = Conf(std::move(policy)); - config.recon_enable_maint_on_flush = true; - config.recon_maint_disabled = false; - //config.buffer_flush_trigger = 4000; - config.maximum_threads = internal_thread_cnt; - - g_thrd_cnt = internal_thread_cnt; - - total_insert_time.store(0); - total_query_time.store(0); - total_query_count.store(0); - - auto extension = new Ext(std::move(config)); - - /* warmup structure w/ 10% of records */ - size_t warmup = 0 * n; - for (size_t k = 0; k < warmup; k++) { - while (!extension->insert(data[k])) { - usleep(1); + for (auto mod : modifiers) { + auto policy = get_policy(sfs[0], buffer_size, pol, n, mod); + auto config = Conf(std::move(policy)); + config.recon_enable_maint_on_flush = true; + config.recon_maint_disabled = false; + // config.buffer_flush_trigger = 4000; + config.maximum_threads = internal_thread_cnt; + + g_thrd_cnt = internal_thread_cnt; + + total_insert_time.store(0); + total_query_time.store(0); + total_query_count.store(0); + + auto extension = new Ext(std::move(config)); + + /* warmup structure w/ 10% of records */ + size_t warmup = 0 * n; + for (size_t k = 0; k < warmup; k++) { + while (!extension->insert(data[k])) { + usleep(1); + } } - } - extension->await_version(); + extension->await_version(); - idx.store(warmup); + idx.store(warmup); - std::thread i_thrds[insert_threads]; - std::thread q_thrds[query_threads]; + std::thread i_thrds[insert_threads]; + std::thread q_thrds[query_threads]; - size_t per_insert_thrd = (n - warmup) / insert_threads; - size_t start = warmup; + size_t per_insert_thrd = (n - warmup) / insert_threads; + size_t start = warmup; - for (size_t i=0; i sfs = {8}; //, 4, 8, 16, 32, 64, 128, 256, 512, 1024}; size_t buffer_size = 8000; - std::vector policies = { - 5 - }; + std::vector policies = { 0, 1, 2}; - std::vector thread_counts = {1, 2, 4, 8, 16, 32}; + std::vector thread_counts = {4}; reccnt = n; @@ -148,7 +146,7 @@ int main(int argc, char **argv) { idx.store(warmup); - size_t thrd_cnt = 8; + size_t thrd_cnt = 1; std::thread thrds[thrd_cnt]; for (size_t i=0; i(d_fname, n); auto queries = read_range_queries(q_fname, .0001); - std::vector sfs = {8}; //, 4, 8, 16, 32, 64, 128, 256, 512, 1024}; + std::vector sfs = {2, 3, 4, 5, 6, 7, 8}; //, 4, 8, 16, 32, 64, 128, 256, 512, 1024}; size_t buffer_size = 8000; - std::vector policies = {0,}; + std::vector policies = {1}; for (auto pol: policies) { for (size_t i=0; iinsert(data[j])) { usleep(1); } + + //fprintf(stderr, "%ld\r", j); } extension->await_version(); + fprintf(stderr, "\n[I] Running Insertion Benchmark\n"); + TIMER_INIT(); + TIMER_START(); for (size_t j=warmup; jinsert(data[j])) { usleep(1); + fprintf(stderr, "insert blocked %ld\r", j); } - TIMER_STOP(); - fprintf(stdout, "I\t%ld\n", TIMER_RESULT()); } + TIMER_STOP(); + auto total_insert_lat = TIMER_RESULT(); + fprintf(stderr, "\n[I] Finished running insertion benchmark\n"); extension->await_version(); + + fprintf(stderr, "[I] Running query benchmark\n"); size_t total = 0; /* repeat the queries a bunch of times */ + TIMER_START(); for (size_t l=0; l<10; l++) { for (size_t j=0; jquery(std::move(q)); total += res.get(); - TIMER_STOP(); - fprintf(stdout, "Q\t%ld\n", TIMER_RESULT()); } } + TIMER_STOP(); + auto total_query_lat = TIMER_RESULT(); + fprintf(stderr, "[I] Finished running query benchmark\n"); + + auto query_latency = total_query_lat / (10*queries.size()); + auto insert_throughput = (size_t) ((double) (n - warmup) / (double) total_insert_lat *1.0e9); + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", pol, sfs[i], n, insert_throughput, query_latency); fprintf(stderr, "%ld\n", total); + fflush(stdout); extension->print_structure(); delete extension; diff --git a/include/framework/DynamicExtension.h b/include/framework/DynamicExtension.h index 762029e..9d76813 100644 --- a/include/framework/DynamicExtension.h +++ b/include/framework/DynamicExtension.h @@ -461,8 +461,8 @@ private: auto extension = (DynamicExtension *)args->extension; extension->SetThreadAffinity(); - // static std::atomic cnt = 0; - // size_t recon_id = cnt.fetch_add(1); + static std::atomic cnt = 0; + size_t recon_id = cnt.fetch_add(1); size_t new_head = 0; std::vector> reconstructions; @@ -570,7 +570,7 @@ private: args->version->set_structure(std::move(std::unique_ptr( active_version->get_structure()->copy()))); - // size_t cur_reccnt = args->version->get_structure()->get_record_count(); + size_t cur_reccnt = args->version->get_structure()->get_record_count(); /* apply our updates to the copied structure (adding/removing shards) */ for (auto recon : reconstructions) { @@ -582,7 +582,7 @@ private: } } - // size_t new_reccnt = args->version->get_structure()->get_record_count(); + size_t new_reccnt = args->version->get_structure()->get_record_count(); // fprintf(stderr, "\t[I] Post-reconstruction L0 Size\t%ld (%ld)\n", // args->version->get_structure()->get_level_vector()[0]->get_shard_count(), @@ -595,9 +595,9 @@ private: active_version->get_head()); // fprintf(stderr, "\t[I] Buffer head set to %ld (%ld)\n", // active_version->get_head(), recon_id); - // if (new_reccnt != cur_reccnt) { - // fprintf(stderr, "ERROR: invalid reccnt (%ld)\n", recon_id); - // } + if (new_reccnt != cur_reccnt) { + fprintf(stderr, "ERROR: invalid reccnt (%ld)\n", recon_id); + } } // fprintf(stderr, "\t[I] Record Counts: %ld %ld %ld (%ld)\n", old_reccnt, diff --git a/include/framework/reconstruction/BSMPolicy.h b/include/framework/reconstruction/BSMPolicy.h index 6d55a12..ae17182 100644 --- a/include/framework/reconstruction/BSMPolicy.h +++ b/include/framework/reconstruction/BSMPolicy.h @@ -21,8 +21,8 @@ class BSMPolicy : public ReconstructionPolicy { LevelVector; public: - BSMPolicy(size_t buffer_size) - : m_scale_factor(2), m_buffer_size(buffer_size) {} + BSMPolicy(size_t buffer_size, size_t scale_factor, size_t modifier=0) + : m_scale_factor(scale_factor), m_buffer_size(buffer_size), m_size_modifier(modifier) {} std::vector get_reconstruction_tasks(const Version *version, LockManager &lock_mngr) const override { @@ -79,11 +79,13 @@ private: return target_level; } - inline size_t capacity(level_index level) const { - return m_buffer_size * pow(m_scale_factor, level + 1); + inline size_t capacity(level_index level, size_t reccnt) const { + size_t base = m_scale_factor * pow(log(reccnt), m_size_modifier); + return m_buffer_size * (base - 1) * pow(base, level + 1); } size_t m_scale_factor; size_t m_buffer_size; + size_t m_size_modifier; }; } // namespace de diff --git a/include/framework/reconstruction/BackgroundTieringPolicy.h b/include/framework/reconstruction/BackgroundTieringPolicy.h index ab19e24..36556a2 100644 --- a/include/framework/reconstruction/BackgroundTieringPolicy.h +++ b/include/framework/reconstruction/BackgroundTieringPolicy.h @@ -21,8 +21,8 @@ class BackgroundTieringPolicy : public ReconstructionPolicy get_reconstruction_tasks( const Version *version, LockManager &lock_mngr) const override { @@ -34,7 +34,7 @@ public: return {}; } - level_index target_level = find_reconstruction_target(levels); + level_index target_level = find_reconstruction_target(levels, version->get_structure()->get_record_count()); assert(target_level != -1); level_index source_level = 0; @@ -68,11 +68,11 @@ public: } private: - level_index find_reconstruction_target(LevelVector &levels) const { + level_index find_reconstruction_target(LevelVector &levels, size_t reccnt) const { level_index target_level = invalid_level_idx; for (level_index i = 1; i < (level_index)levels.size(); i++) { - if (levels[i]->get_shard_count() + 1 <= capacity()) { + if (levels[i]->get_shard_count() + 1 <= capacity(reccnt)) { target_level = i; break; } @@ -81,9 +81,10 @@ private: return target_level; } - inline size_t capacity() const { return m_scale_factor; } + inline size_t capacity(size_t reccnt) const { return m_scale_factor * std::pow(std::log(reccnt), m_size_modifier); } size_t m_scale_factor; size_t m_buffer_size; + size_t m_size_modifier; }; } // namespace de diff --git a/include/framework/reconstruction/LevelingPolicy.h b/include/framework/reconstruction/LevelingPolicy.h index f0feb53..d448fee 100644 --- a/include/framework/reconstruction/LevelingPolicy.h +++ b/include/framework/reconstruction/LevelingPolicy.h @@ -21,8 +21,8 @@ class LevelingPolicy : public ReconstructionPolicy { LevelVector; public: - LevelingPolicy(size_t scale_factor, size_t buffer_size) - : m_scale_factor(scale_factor), m_buffer_size(buffer_size) {} + LevelingPolicy(size_t scale_factor, size_t buffer_size, size_t modifier=0) + : m_scale_factor(scale_factor), m_buffer_size(buffer_size), m_size_modifier(modifier) {} std::vector get_reconstruction_tasks(const Version *version, LockManager &lock_mngr) const override { @@ -34,7 +34,16 @@ public: ReconstructionVector reconstructions; auto levels = version->get_structure()->get_level_vector(); - level_index target_level = find_reconstruction_target(levels); + /* + * on the very first flush, the whole structure will be empty, so + * there isn't anything to merge into level 1 from level 0 yet. + */ + if (version->get_structure()->get_record_count() == 0) { + return reconstructions; + } + + level_index target_level = find_reconstruction_target(levels, version->get_structure()->get_record_count()); + assert(target_level != -1); level_index source_level = 0; if (target_level == invalid_level_idx) { @@ -49,20 +58,22 @@ public: (i == 0) ? m_buffer_size + target_reccnt : levels[i - 1]->get_record_count() + target_reccnt; - reconstructions.add_reconstruction(i - 1, i, total_reccnt, - ReconstructionType::Merge); + if (total_reccnt > 0) { + auto type = (i >= (level_index) levels.size()) ? ReconstructionType::Append : ReconstructionType::Merge; + reconstructions.add_reconstruction(i - 1, i, total_reccnt, type); + } } return reconstructions; } private: - level_index find_reconstruction_target(LevelVector &levels) const { + level_index find_reconstruction_target(LevelVector &levels, size_t reccnt) const { level_index target_level = invalid_level_idx; size_t incoming_records = m_buffer_size; for (level_index i = 1; i < (level_index)levels.size(); i++) { - if (levels[i]->get_record_count() + incoming_records < capacity(i)) { + if (levels[i]->get_record_count() + incoming_records < capacity(i, reccnt)) { target_level = i; break; } @@ -73,11 +84,12 @@ private: return target_level; } - inline size_t capacity(level_index level) const { - return m_buffer_size * pow(m_scale_factor, level); + inline size_t capacity(level_index level, size_t reccnt) const { + return m_buffer_size * pow(m_scale_factor * pow(std::log(reccnt), m_size_modifier), level); } size_t m_scale_factor; size_t m_buffer_size; + size_t m_size_modifier; }; } // namespace de diff --git a/include/framework/reconstruction/TieringPolicy.h b/include/framework/reconstruction/TieringPolicy.h index ce8130e..55f5f47 100644 --- a/include/framework/reconstruction/TieringPolicy.h +++ b/include/framework/reconstruction/TieringPolicy.h @@ -21,8 +21,8 @@ class TieringPolicy : public ReconstructionPolicy { LevelVector; public: - TieringPolicy(size_t scale_factor, size_t buffer_size) - : m_scale_factor(scale_factor), m_buffer_size(buffer_size) {} + TieringPolicy(size_t scale_factor, size_t buffer_size, size_t modifier=0) + : m_scale_factor(scale_factor), m_buffer_size(buffer_size), m_size_modifier(modifier) {} std::vector get_reconstruction_tasks( const Version *version, LockManager &lock_mngr) const override { @@ -34,7 +34,7 @@ public: ReconstructionVector reconstructions; auto levels = version->get_structure()->get_level_vector(); - level_index target_level = find_reconstruction_target(levels); + level_index target_level = find_reconstruction_target(levels, version->get_structure()->get_record_count()); assert(target_level != -1); level_index source_level = 0; @@ -60,11 +60,11 @@ public: } private: - level_index find_reconstruction_target(LevelVector &levels) const { + level_index find_reconstruction_target(LevelVector &levels, size_t reccnt) const { level_index target_level = invalid_level_idx; for (level_index i = 1; i < (level_index)levels.size(); i++) { - if (levels[i]->get_shard_count() + 1 <= capacity()) { + if (levels[i]->get_shard_count() + 1 <= capacity(reccnt)) { target_level = i; break; } @@ -73,9 +73,10 @@ private: return target_level; } - inline size_t capacity() const { return m_scale_factor; } + inline size_t capacity(size_t reccnt) const { return m_scale_factor * std::pow(std::log(reccnt), m_size_modifier); } size_t m_scale_factor; size_t m_buffer_size; + size_t m_size_modifier; }; } // namespace de diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index 521e68b..a77088f 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -231,6 +231,9 @@ public: if (shard_idx != -1) { m_levels[shards[i].first]->delete_shard(shard_idx); + } else { + fprintf(stderr, "ERROR: failed to delete shard %ld\t%p\n", shards[i].first, shards[i].second); + //exit(EXIT_FAILURE); } } } diff --git a/include/framework/structure/InternalLevel.h b/include/framework/structure/InternalLevel.h index 6e8b67e..5659c72 100644 --- a/include/framework/structure/InternalLevel.h +++ b/include/framework/structure/InternalLevel.h @@ -102,8 +102,12 @@ public: return false; } - const ShardType *get_shard(size_t idx) const { - if (idx >= m_shards.size()) { + const ShardType *get_shard(ssize_t idx) const { + if (idx == all_shards_idx) { + idx = 0; + } + + if (idx >= (ssize_t) m_shards.size()) { return nullptr; } -- cgit v1.2.3