From fd0e99e618319974320ed3fb49535aec501be1fb Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 6 Feb 2025 15:56:33 -0500 Subject: Background compaction stuff --- benchmarks/include/standard_benchmarks.h | 12 ++- benchmarks/tail-latency/isam_insert_dist.cpp | 107 ++++++++++++++++++++ benchmarks/tail-latency/standard_latency_dist.cpp | 114 ++++++++++++++++++++++ 3 files changed, 231 insertions(+), 2 deletions(-) create mode 100644 benchmarks/tail-latency/isam_insert_dist.cpp create mode 100644 benchmarks/tail-latency/standard_latency_dist.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 76423ab..2cbe1a8 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -14,6 +14,8 @@ #include "framework/DynamicExtension.h" #include "framework/interface/Query.h" +#include "framework/reconstruction/FixedShardCountPolicy.h" +#include "framework/reconstruction/ReconstructionPolicy.h" #include "query/irs.h" #include "psu-util/progress.h" #include "benchmark_types.h" @@ -23,13 +25,14 @@ #include "framework/reconstruction/TieringPolicy.h" #include "framework/reconstruction/BSMPolicy.h" #include "framework/reconstruction/FloodL0Policy.h" +#include "framework/reconstruction/BackgroundTieringPolicy.h" constexpr double delete_proportion = 0.05; static size_t g_deleted_records = 0; static size_t total = 0; template Q> -de::ReconstructionPolicy *get_policy(size_t scale_factor, size_t buffer_size, int policy=0) { +std::unique_ptr> get_policy(size_t scale_factor, size_t buffer_size, int policy=0, size_t reccnt=0) { de::ReconstructionPolicy *recon = nullptr; @@ -41,9 +44,14 @@ de::ReconstructionPolicy *get_policy(size_t scale_factor, size_t buffer_si recon = new de::BSMPolicy(buffer_size); } else if (policy == 3) { recon = new de::FloodL0Policy(buffer_size); + } else if (policy == 4) { + assert(reccnt > 0); + recon = new de::FixedShardCountPolicy(buffer_size, scale_factor, reccnt); + } else if (policy == 5) { + recon = new de::BackgroundTieringPolicy(scale_factor, buffer_size); } - return recon; + return std::unique_ptr>(recon); } template diff --git a/benchmarks/tail-latency/isam_insert_dist.cpp b/benchmarks/tail-latency/isam_insert_dist.cpp new file mode 100644 index 0000000..88d37c5 --- /dev/null +++ b/benchmarks/tail-latency/isam_insert_dist.cpp @@ -0,0 +1,107 @@ +/* + * + */ + +#include "framework/scheduling/FIFOScheduler.h" +#define ENABLE_TIMER +#define TS_TEST + +#include + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTreeShard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef Q::Parameters QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + + auto data = read_sosd_file(d_fname, n); + auto queries = read_range_queries(q_fname, .0001); + + std::vector policies = {3}; + std::vector buffers = {8000, 16000, 32000}; + std::vector sfs = {8}; + + for (size_t l=0; l(sfs[k], buffers[j], policies[l]); + auto extension = new Ext(policy, buffers[j]/4, buffers[j]); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t i=0; iinsert(data[i])) { + usleep(1); + } + } + + extension->await_next_epoch(); + + TIMER_INIT(); + + for (size_t i=warmup; iinsert(data[i])) { + usleep(1); + } + TIMER_STOP(); + + //fprintf(stdout, "%ld\t%ld\t%d\t%ld\n", sfs[k], buffers[j], policies[l], TIMER_RESULT()); + } + + extension->await_next_epoch(); + + /* repeat the queries a bunch of times */ + for (size_t l=0; l<10; l++) { + for (size_t i=0; iquery(std::move(q)); + res.get(); + TIMER_STOP(); + + fprintf(stdout, "%ld\t%ld\t%d\t%ld\n", sfs[k], buffers[j], policies[l], TIMER_RESULT()); + } + } + + + QP p = {0, 10000}; + auto res =extension->query(std::move(p)); + + fprintf(stderr, "%ld\n", res.get()); + extension->await_next_epoch(); + delete extension; + }}} + + + fflush(stderr); +} + diff --git a/benchmarks/tail-latency/standard_latency_dist.cpp b/benchmarks/tail-latency/standard_latency_dist.cpp new file mode 100644 index 0000000..2e800fc --- /dev/null +++ b/benchmarks/tail-latency/standard_latency_dist.cpp @@ -0,0 +1,114 @@ +/* + * + */ + +#include "framework/scheduling/SerialScheduler.h" +#include "framework/util/Configuration.h" +#include "util/types.h" +#define ENABLE_TIMER +#define TS_TEST + +#include + +#include "framework/DynamicExtension.h" +#include "framework/scheduling/FIFOScheduler.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include "framework/reconstruction/FixedShardCountPolicy.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::TrieSpline Shard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef Q::Parameters QP; +typedef de::DEConfiguration Conf; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto data = read_sosd_file(d_fname, n); + auto queries = read_range_queries(q_fname, .0001); + + std::vector sfs = {8}; //, 4, 8, 16, 32, 64, 128, 256, 512, 1024}; + size_t buffer_size = 8000; + std::vector policies = {5,}; + + for (auto pol: policies) { + for (size_t i=0; i(sfs[i], buffer_size, pol, n); + auto config = Conf(std::move(policy)); + config.recon_enable_maint_on_flush = true; + config.recon_maint_disabled = false; + config.buffer_flush_trigger = 4000; + + auto extension = new Ext(std::move(config)); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t j=0; jinsert(data[j])) { + usleep(1); + } + } + + extension->await_version(); + + TIMER_INIT(); + + TIMER_START(); + for (size_t j=warmup; jinsert(data[j])) { + usleep(1); + fprintf(stderr, "%ld\n", j); + } + } + TIMER_STOP(); + + size_t insert_tput = (double) (n - warmup) / (double) (TIMER_RESULT()) * 1e9; + + extension->await_version(); + + size_t total = 0; + TIMER_START(); + /* repeat the queries a bunch of times */ + for (size_t l=0; l<10; l++) { + for (size_t j=0; jquery(std::move(q)); + total += res.get(); + } + } + TIMER_STOP(); + + size_t query_lat = (double) TIMER_RESULT() / (10*queries.size()); + + fprintf(stdout, "S\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\n", pol, sfs[i], extension->get_height(), extension->get_shard_count(), extension->get_record_count(), total, insert_tput, query_lat); + extension->print_structure(); + delete extension; + } + } + + fflush(stderr); +} + -- cgit v1.2.3