From 911eb0ef61dc7d327507c6406120a80797190884 Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Tue, 12 Aug 2025 13:25:50 -0400 Subject: file_util.h: Swapped from random_shuffle to shuffle --- benchmarks/include/file_util.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 2d340e6..ef7e464 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -214,7 +215,9 @@ static std::vector generate_uniform(size_t n) { records[i].value = i; } - std::random_shuffle(records.begin(), records.end()); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(records.begin(), records.end(), g); return records; } -- cgit v1.2.3 From 601481ed0a8061a372900cfb6761e8de81651339 Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Thu, 14 Aug 2025 09:09:44 -0400 Subject: Per record cost estimation progress --- CMakeLists.txt | 5 ++ .../tail-latency/isam_construction_times.cpp | 87 ++++++++++++++++++++++ include/framework/scheduling/statistics.h | 11 ++- include/util/types.h | 7 +- 4 files changed, 102 insertions(+), 8 deletions(-) create mode 100644 benchmarks/tail-latency/isam_construction_times.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a7bcad6..31cdc50 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -348,6 +348,11 @@ if (tail_bench) target_include_directories(stall_benchmark_vptree PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(stall_benchmark_vptree PUBLIC -mcx16) + add_executable(isam_construction_times ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/tail-latency/isam_construction_times.cpp) + target_link_libraries(isam_construction_times PUBLIC gsl pthread atomic) + target_include_directories(isam_construction_times PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) + target_link_options(isam_construction_times PUBLIC -mcx16) + add_executable(selectivity_sweep ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/tail-latency/selectivity_sweep.cpp) target_link_libraries(selectivity_sweep PUBLIC gsl pthread atomic) target_include_directories(selectivity_sweep PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) diff --git a/benchmarks/tail-latency/isam_construction_times.cpp b/benchmarks/tail-latency/isam_construction_times.cpp new file mode 100644 index 0000000..ecf085c --- /dev/null +++ b/benchmarks/tail-latency/isam_construction_times.cpp @@ -0,0 +1,87 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include "framework/scheduling/FIFOScheduler.h" +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" +#include "framework/util/Configuration.h" + +#include + + +typedef de::Record Rec; +typedef de::ISAMTreeShard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef Q::Parameters QP; +typedef de::DEConfiguration + Conf; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + + auto data = read_sosd_file(d_fname, n); + + std::vector policies = {6}; + std::vector buffers = {12000}; + std::vector sfs = {8}; + + for (size_t l=0; l(sfs[k], buffers[j], policies[l]); + auto config = Conf(std::move(policy)); + config.recon_enable_maint_on_flush = true; + config.recon_maint_disabled = false; + // config.buffer_flush_trigger = 4000; + config.maximum_threads = 6; + auto extension = new Ext(std::move(config)); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t i=0; iinsert(data[i])) { + usleep(1); + } + } + + extension->await_version(); + + for (size_t i=warmup; iinsert(data[i])) { + usleep(1); + } + } + + extension->await_version(); + + extension->print_scheduler_statistics(); + delete extension; + + }}} + + + fflush(stderr); +} + diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h index 34699f1..6d9f9f0 100644 --- a/include/framework/scheduling/statistics.h +++ b/include/framework/scheduling/statistics.h @@ -118,10 +118,9 @@ public: size_t first_query = UINT64_MAX; - /* hard-coded for the moment to only consider queries */ for (auto &job : m_jobs) { if (job.second.type != 1) { - continue; + fprintf(stdout, "%ld %ld %ld %ld\n", job.second.id, job.second.size, job.second.runtime(), job.second.runtime() / (job.second.size)); } if (job.first < first_query) { @@ -152,8 +151,8 @@ public: } - int64_t average_queue_time = total_queue_time / query_cnt; - int64_t average_runtime = total_runtime / query_cnt; + int64_t average_queue_time = (query_cnt) ? total_queue_time / query_cnt : 0; + int64_t average_runtime = (query_cnt) ? total_runtime / query_cnt : 0; /* calculate standard deviations */ int64_t queue_deviation_sum = 0; @@ -168,8 +167,8 @@ public: } - int64_t queue_stddev = std::sqrt(queue_deviation_sum / query_cnt); - int64_t runtime_stddev = std::sqrt(runtime_deviation_sum / query_cnt); + int64_t queue_stddev = (query_cnt) ? std::sqrt(queue_deviation_sum / query_cnt) : 0; + int64_t runtime_stddev = (query_cnt) ? std::sqrt(runtime_deviation_sum / query_cnt) : 0; fprintf(stdout, "Query Count: %ld\tWorst Query: %ld\tFirst Query: %ld\n", query_cnt, worst_query, first_query); diff --git a/include/util/types.h b/include/util/types.h index c10f7ff..88774f5 100644 --- a/include/util/types.h +++ b/include/util/types.h @@ -115,7 +115,7 @@ public: size_t reccnt, ReconstructionType type) { m_tasks.push_back({std::move(shards), target, reccnt, type}); - + total_reccnt += reccnt; } void add_reconstruction(level_index source, level_index target, @@ -130,7 +130,10 @@ public: total_reccnt += reccnt; } - void add_reconstruction(ReconstructionTask task) { m_tasks.push_back(task); } + void add_reconstruction(ReconstructionTask task) { + m_tasks.push_back(task); + total_reccnt += task.reccnt; + } size_t get_total_reccnt() { return total_reccnt; } -- cgit v1.2.3